1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=SSE,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefixes=SSE,SSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq | FileCheck %s --check-prefixes=AVX,AVX512VPOPCNTDQ
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512VPOPCNTDQVL
10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg | FileCheck %s --check-prefix=BITALG_NOVLX
11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg,+avx512vl | FileCheck %s --check-prefix=BITALG
14 define <16 x i8> @ugt_1_v16i8(<16 x i8> %0) {
15 ; SSE-LABEL: ugt_1_v16i8:
17 ; SSE-NEXT: pcmpeqd %xmm1, %xmm1
18 ; SSE-NEXT: movdqa %xmm0, %xmm2
19 ; SSE-NEXT: paddb %xmm1, %xmm2
20 ; SSE-NEXT: pand %xmm2, %xmm0
21 ; SSE-NEXT: pxor %xmm2, %xmm2
22 ; SSE-NEXT: pcmpeqb %xmm2, %xmm0
23 ; SSE-NEXT: pxor %xmm1, %xmm0
26 ; AVX1-LABEL: ugt_1_v16i8:
28 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
29 ; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm2
30 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
31 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
32 ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
33 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
36 ; AVX2-LABEL: ugt_1_v16i8:
38 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
39 ; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm2
40 ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
41 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
42 ; AVX2-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
43 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
46 ; AVX512VPOPCNTDQ-LABEL: ugt_1_v16i8:
47 ; AVX512VPOPCNTDQ: # %bb.0:
48 ; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
49 ; AVX512VPOPCNTDQ-NEXT: vpaddb %xmm1, %xmm0, %xmm1
50 ; AVX512VPOPCNTDQ-NEXT: vpand %xmm1, %xmm0, %xmm0
51 ; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
52 ; AVX512VPOPCNTDQ-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
53 ; AVX512VPOPCNTDQ-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
54 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
55 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
56 ; AVX512VPOPCNTDQ-NEXT: retq
58 ; AVX512VPOPCNTDQVL-LABEL: ugt_1_v16i8:
59 ; AVX512VPOPCNTDQVL: # %bb.0:
60 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
61 ; AVX512VPOPCNTDQVL-NEXT: vpaddb %xmm1, %xmm0, %xmm1
62 ; AVX512VPOPCNTDQVL-NEXT: vpand %xmm1, %xmm0, %xmm0
63 ; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
64 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
65 ; AVX512VPOPCNTDQVL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
66 ; AVX512VPOPCNTDQVL-NEXT: retq
68 ; BITALG_NOVLX-LABEL: ugt_1_v16i8:
69 ; BITALG_NOVLX: # %bb.0:
70 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
71 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
72 ; BITALG_NOVLX-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
73 ; BITALG_NOVLX-NEXT: vzeroupper
74 ; BITALG_NOVLX-NEXT: retq
76 ; BITALG-LABEL: ugt_1_v16i8:
78 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
79 ; BITALG-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
81 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
82 %3 = icmp ugt <16 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
83 %4 = sext <16 x i1> %3 to <16 x i8>
87 define <16 x i8> @ult_2_v16i8(<16 x i8> %0) {
88 ; SSE-LABEL: ult_2_v16i8:
90 ; SSE-NEXT: pcmpeqd %xmm1, %xmm1
91 ; SSE-NEXT: paddb %xmm0, %xmm1
92 ; SSE-NEXT: pand %xmm1, %xmm0
93 ; SSE-NEXT: pxor %xmm1, %xmm1
94 ; SSE-NEXT: pcmpeqb %xmm1, %xmm0
97 ; AVX-LABEL: ult_2_v16i8:
99 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
100 ; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm1
101 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
102 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
103 ; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
106 ; BITALG_NOVLX-LABEL: ult_2_v16i8:
107 ; BITALG_NOVLX: # %bb.0:
108 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
109 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
110 ; BITALG_NOVLX-NEXT: vpbroadcastb {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
111 ; BITALG_NOVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
112 ; BITALG_NOVLX-NEXT: vzeroupper
113 ; BITALG_NOVLX-NEXT: retq
115 ; BITALG-LABEL: ult_2_v16i8:
117 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
118 ; BITALG-NEXT: vpbroadcastb {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
119 ; BITALG-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
121 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
122 %3 = icmp ult <16 x i8> %2, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
123 %4 = sext <16 x i1> %3 to <16 x i8>
127 define <16 x i8> @ugt_2_v16i8(<16 x i8> %0) {
128 ; SSE2-LABEL: ugt_2_v16i8:
130 ; SSE2-NEXT: movdqa %xmm0, %xmm1
131 ; SSE2-NEXT: psrlw $1, %xmm1
132 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
133 ; SSE2-NEXT: psubb %xmm1, %xmm0
134 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
135 ; SSE2-NEXT: movdqa %xmm0, %xmm2
136 ; SSE2-NEXT: pand %xmm1, %xmm2
137 ; SSE2-NEXT: psrlw $2, %xmm0
138 ; SSE2-NEXT: pand %xmm1, %xmm0
139 ; SSE2-NEXT: paddb %xmm2, %xmm0
140 ; SSE2-NEXT: movdqa %xmm0, %xmm1
141 ; SSE2-NEXT: psrlw $4, %xmm1
142 ; SSE2-NEXT: paddb %xmm1, %xmm0
143 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
144 ; SSE2-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
147 ; SSE3-LABEL: ugt_2_v16i8:
149 ; SSE3-NEXT: movdqa %xmm0, %xmm1
150 ; SSE3-NEXT: psrlw $1, %xmm1
151 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
152 ; SSE3-NEXT: psubb %xmm1, %xmm0
153 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
154 ; SSE3-NEXT: movdqa %xmm0, %xmm2
155 ; SSE3-NEXT: pand %xmm1, %xmm2
156 ; SSE3-NEXT: psrlw $2, %xmm0
157 ; SSE3-NEXT: pand %xmm1, %xmm0
158 ; SSE3-NEXT: paddb %xmm2, %xmm0
159 ; SSE3-NEXT: movdqa %xmm0, %xmm1
160 ; SSE3-NEXT: psrlw $4, %xmm1
161 ; SSE3-NEXT: paddb %xmm1, %xmm0
162 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
163 ; SSE3-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
166 ; SSSE3-LABEL: ugt_2_v16i8:
168 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
169 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
170 ; SSSE3-NEXT: pand %xmm2, %xmm3
171 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
172 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
173 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
174 ; SSSE3-NEXT: psrlw $4, %xmm0
175 ; SSSE3-NEXT: pand %xmm2, %xmm0
176 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
177 ; SSSE3-NEXT: paddb %xmm4, %xmm1
178 ; SSSE3-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
179 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
182 ; SSE41-LABEL: ugt_2_v16i8:
184 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
185 ; SSE41-NEXT: movdqa %xmm0, %xmm3
186 ; SSE41-NEXT: pand %xmm2, %xmm3
187 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
188 ; SSE41-NEXT: movdqa %xmm1, %xmm4
189 ; SSE41-NEXT: pshufb %xmm3, %xmm4
190 ; SSE41-NEXT: psrlw $4, %xmm0
191 ; SSE41-NEXT: pand %xmm2, %xmm0
192 ; SSE41-NEXT: pshufb %xmm0, %xmm1
193 ; SSE41-NEXT: paddb %xmm4, %xmm1
194 ; SSE41-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
195 ; SSE41-NEXT: movdqa %xmm1, %xmm0
198 ; AVX1-LABEL: ugt_2_v16i8:
200 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
201 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
202 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
203 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
204 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
205 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
206 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
207 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
208 ; AVX1-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
211 ; AVX2-LABEL: ugt_2_v16i8:
213 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
214 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
215 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
216 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
217 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
218 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
219 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
220 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
221 ; AVX2-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
224 ; AVX512VPOPCNTDQ-LABEL: ugt_2_v16i8:
225 ; AVX512VPOPCNTDQ: # %bb.0:
226 ; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
227 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
228 ; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0
229 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
230 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
231 ; AVX512VPOPCNTDQ-NEXT: retq
233 ; AVX512VPOPCNTDQVL-LABEL: ugt_2_v16i8:
234 ; AVX512VPOPCNTDQVL: # %bb.0:
235 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
236 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
237 ; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0
238 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
239 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
240 ; AVX512VPOPCNTDQVL-NEXT: retq
242 ; BITALG_NOVLX-LABEL: ugt_2_v16i8:
243 ; BITALG_NOVLX: # %bb.0:
244 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
245 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
246 ; BITALG_NOVLX-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
247 ; BITALG_NOVLX-NEXT: vzeroupper
248 ; BITALG_NOVLX-NEXT: retq
250 ; BITALG-LABEL: ugt_2_v16i8:
252 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
253 ; BITALG-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
255 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
256 %3 = icmp ugt <16 x i8> %2, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
257 %4 = sext <16 x i1> %3 to <16 x i8>
261 define <16 x i8> @ult_3_v16i8(<16 x i8> %0) {
262 ; SSE2-LABEL: ult_3_v16i8:
264 ; SSE2-NEXT: movdqa %xmm0, %xmm1
265 ; SSE2-NEXT: psrlw $1, %xmm1
266 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
267 ; SSE2-NEXT: psubb %xmm1, %xmm0
268 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
269 ; SSE2-NEXT: movdqa %xmm0, %xmm2
270 ; SSE2-NEXT: pand %xmm1, %xmm2
271 ; SSE2-NEXT: psrlw $2, %xmm0
272 ; SSE2-NEXT: pand %xmm1, %xmm0
273 ; SSE2-NEXT: paddb %xmm2, %xmm0
274 ; SSE2-NEXT: movdqa %xmm0, %xmm1
275 ; SSE2-NEXT: psrlw $4, %xmm1
276 ; SSE2-NEXT: paddb %xmm0, %xmm1
277 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
278 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
279 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm0
282 ; SSE3-LABEL: ult_3_v16i8:
284 ; SSE3-NEXT: movdqa %xmm0, %xmm1
285 ; SSE3-NEXT: psrlw $1, %xmm1
286 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
287 ; SSE3-NEXT: psubb %xmm1, %xmm0
288 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
289 ; SSE3-NEXT: movdqa %xmm0, %xmm2
290 ; SSE3-NEXT: pand %xmm1, %xmm2
291 ; SSE3-NEXT: psrlw $2, %xmm0
292 ; SSE3-NEXT: pand %xmm1, %xmm0
293 ; SSE3-NEXT: paddb %xmm2, %xmm0
294 ; SSE3-NEXT: movdqa %xmm0, %xmm1
295 ; SSE3-NEXT: psrlw $4, %xmm1
296 ; SSE3-NEXT: paddb %xmm0, %xmm1
297 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
298 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
299 ; SSE3-NEXT: pcmpgtb %xmm1, %xmm0
302 ; SSSE3-LABEL: ult_3_v16i8:
304 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
305 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
306 ; SSSE3-NEXT: pand %xmm1, %xmm2
307 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
308 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
309 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
310 ; SSSE3-NEXT: psrlw $4, %xmm0
311 ; SSSE3-NEXT: pand %xmm1, %xmm0
312 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
313 ; SSSE3-NEXT: paddb %xmm4, %xmm3
314 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
315 ; SSSE3-NEXT: pcmpgtb %xmm3, %xmm0
318 ; SSE41-LABEL: ult_3_v16i8:
320 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
321 ; SSE41-NEXT: movdqa %xmm0, %xmm2
322 ; SSE41-NEXT: pand %xmm1, %xmm2
323 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
324 ; SSE41-NEXT: movdqa %xmm3, %xmm4
325 ; SSE41-NEXT: pshufb %xmm2, %xmm4
326 ; SSE41-NEXT: psrlw $4, %xmm0
327 ; SSE41-NEXT: pand %xmm1, %xmm0
328 ; SSE41-NEXT: pshufb %xmm0, %xmm3
329 ; SSE41-NEXT: paddb %xmm4, %xmm3
330 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
331 ; SSE41-NEXT: pcmpgtb %xmm3, %xmm0
334 ; AVX1-LABEL: ult_3_v16i8:
336 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
337 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
338 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
339 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
340 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
341 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
342 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
343 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
344 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
345 ; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
348 ; AVX2-LABEL: ult_3_v16i8:
350 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
351 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
352 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
353 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
354 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
355 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
356 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
357 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
358 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
359 ; AVX2-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
362 ; AVX512VPOPCNTDQ-LABEL: ult_3_v16i8:
363 ; AVX512VPOPCNTDQ: # %bb.0:
364 ; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
365 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
366 ; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0
367 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastb {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
368 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
369 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
370 ; AVX512VPOPCNTDQ-NEXT: retq
372 ; AVX512VPOPCNTDQVL-LABEL: ult_3_v16i8:
373 ; AVX512VPOPCNTDQVL: # %bb.0:
374 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
375 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
376 ; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0
377 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
378 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
379 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
380 ; AVX512VPOPCNTDQVL-NEXT: retq
382 ; BITALG_NOVLX-LABEL: ult_3_v16i8:
383 ; BITALG_NOVLX: # %bb.0:
384 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
385 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
386 ; BITALG_NOVLX-NEXT: vpbroadcastb {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
387 ; BITALG_NOVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
388 ; BITALG_NOVLX-NEXT: vzeroupper
389 ; BITALG_NOVLX-NEXT: retq
391 ; BITALG-LABEL: ult_3_v16i8:
393 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
394 ; BITALG-NEXT: vpbroadcastb {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
395 ; BITALG-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
397 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
398 %3 = icmp ult <16 x i8> %2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
399 %4 = sext <16 x i1> %3 to <16 x i8>
403 define <16 x i8> @ugt_3_v16i8(<16 x i8> %0) {
404 ; SSE2-LABEL: ugt_3_v16i8:
406 ; SSE2-NEXT: movdqa %xmm0, %xmm1
407 ; SSE2-NEXT: psrlw $1, %xmm1
408 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
409 ; SSE2-NEXT: psubb %xmm1, %xmm0
410 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
411 ; SSE2-NEXT: movdqa %xmm0, %xmm2
412 ; SSE2-NEXT: pand %xmm1, %xmm2
413 ; SSE2-NEXT: psrlw $2, %xmm0
414 ; SSE2-NEXT: pand %xmm1, %xmm0
415 ; SSE2-NEXT: paddb %xmm2, %xmm0
416 ; SSE2-NEXT: movdqa %xmm0, %xmm1
417 ; SSE2-NEXT: psrlw $4, %xmm1
418 ; SSE2-NEXT: paddb %xmm1, %xmm0
419 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
420 ; SSE2-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
423 ; SSE3-LABEL: ugt_3_v16i8:
425 ; SSE3-NEXT: movdqa %xmm0, %xmm1
426 ; SSE3-NEXT: psrlw $1, %xmm1
427 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
428 ; SSE3-NEXT: psubb %xmm1, %xmm0
429 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
430 ; SSE3-NEXT: movdqa %xmm0, %xmm2
431 ; SSE3-NEXT: pand %xmm1, %xmm2
432 ; SSE3-NEXT: psrlw $2, %xmm0
433 ; SSE3-NEXT: pand %xmm1, %xmm0
434 ; SSE3-NEXT: paddb %xmm2, %xmm0
435 ; SSE3-NEXT: movdqa %xmm0, %xmm1
436 ; SSE3-NEXT: psrlw $4, %xmm1
437 ; SSE3-NEXT: paddb %xmm1, %xmm0
438 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
439 ; SSE3-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
442 ; SSSE3-LABEL: ugt_3_v16i8:
444 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
445 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
446 ; SSSE3-NEXT: pand %xmm2, %xmm3
447 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
448 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
449 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
450 ; SSSE3-NEXT: psrlw $4, %xmm0
451 ; SSSE3-NEXT: pand %xmm2, %xmm0
452 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
453 ; SSSE3-NEXT: paddb %xmm4, %xmm1
454 ; SSSE3-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
455 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
458 ; SSE41-LABEL: ugt_3_v16i8:
460 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
461 ; SSE41-NEXT: movdqa %xmm0, %xmm3
462 ; SSE41-NEXT: pand %xmm2, %xmm3
463 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
464 ; SSE41-NEXT: movdqa %xmm1, %xmm4
465 ; SSE41-NEXT: pshufb %xmm3, %xmm4
466 ; SSE41-NEXT: psrlw $4, %xmm0
467 ; SSE41-NEXT: pand %xmm2, %xmm0
468 ; SSE41-NEXT: pshufb %xmm0, %xmm1
469 ; SSE41-NEXT: paddb %xmm4, %xmm1
470 ; SSE41-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
471 ; SSE41-NEXT: movdqa %xmm1, %xmm0
474 ; AVX1-LABEL: ugt_3_v16i8:
476 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
477 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
478 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
479 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
480 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
481 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
482 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
483 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
484 ; AVX1-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
487 ; AVX2-LABEL: ugt_3_v16i8:
489 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
490 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
491 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
492 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
493 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
494 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
495 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
496 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
497 ; AVX2-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
500 ; AVX512VPOPCNTDQ-LABEL: ugt_3_v16i8:
501 ; AVX512VPOPCNTDQ: # %bb.0:
502 ; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
503 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
504 ; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0
505 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
506 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
507 ; AVX512VPOPCNTDQ-NEXT: retq
509 ; AVX512VPOPCNTDQVL-LABEL: ugt_3_v16i8:
510 ; AVX512VPOPCNTDQVL: # %bb.0:
511 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
512 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
513 ; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0
514 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
515 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
516 ; AVX512VPOPCNTDQVL-NEXT: retq
518 ; BITALG_NOVLX-LABEL: ugt_3_v16i8:
519 ; BITALG_NOVLX: # %bb.0:
520 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
521 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
522 ; BITALG_NOVLX-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
523 ; BITALG_NOVLX-NEXT: vzeroupper
524 ; BITALG_NOVLX-NEXT: retq
526 ; BITALG-LABEL: ugt_3_v16i8:
528 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
529 ; BITALG-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
531 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
532 %3 = icmp ugt <16 x i8> %2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
533 %4 = sext <16 x i1> %3 to <16 x i8>
537 define <16 x i8> @ult_4_v16i8(<16 x i8> %0) {
538 ; SSE2-LABEL: ult_4_v16i8:
540 ; SSE2-NEXT: movdqa %xmm0, %xmm1
541 ; SSE2-NEXT: psrlw $1, %xmm1
542 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
543 ; SSE2-NEXT: psubb %xmm1, %xmm0
544 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
545 ; SSE2-NEXT: movdqa %xmm0, %xmm2
546 ; SSE2-NEXT: pand %xmm1, %xmm2
547 ; SSE2-NEXT: psrlw $2, %xmm0
548 ; SSE2-NEXT: pand %xmm1, %xmm0
549 ; SSE2-NEXT: paddb %xmm2, %xmm0
550 ; SSE2-NEXT: movdqa %xmm0, %xmm1
551 ; SSE2-NEXT: psrlw $4, %xmm1
552 ; SSE2-NEXT: paddb %xmm0, %xmm1
553 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
554 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
555 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm0
558 ; SSE3-LABEL: ult_4_v16i8:
560 ; SSE3-NEXT: movdqa %xmm0, %xmm1
561 ; SSE3-NEXT: psrlw $1, %xmm1
562 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
563 ; SSE3-NEXT: psubb %xmm1, %xmm0
564 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
565 ; SSE3-NEXT: movdqa %xmm0, %xmm2
566 ; SSE3-NEXT: pand %xmm1, %xmm2
567 ; SSE3-NEXT: psrlw $2, %xmm0
568 ; SSE3-NEXT: pand %xmm1, %xmm0
569 ; SSE3-NEXT: paddb %xmm2, %xmm0
570 ; SSE3-NEXT: movdqa %xmm0, %xmm1
571 ; SSE3-NEXT: psrlw $4, %xmm1
572 ; SSE3-NEXT: paddb %xmm0, %xmm1
573 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
574 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
575 ; SSE3-NEXT: pcmpgtb %xmm1, %xmm0
578 ; SSSE3-LABEL: ult_4_v16i8:
580 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
581 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
582 ; SSSE3-NEXT: pand %xmm1, %xmm2
583 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
584 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
585 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
586 ; SSSE3-NEXT: psrlw $4, %xmm0
587 ; SSSE3-NEXT: pand %xmm1, %xmm0
588 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
589 ; SSSE3-NEXT: paddb %xmm4, %xmm3
590 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
591 ; SSSE3-NEXT: pcmpgtb %xmm3, %xmm0
594 ; SSE41-LABEL: ult_4_v16i8:
596 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
597 ; SSE41-NEXT: movdqa %xmm0, %xmm2
598 ; SSE41-NEXT: pand %xmm1, %xmm2
599 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
600 ; SSE41-NEXT: movdqa %xmm3, %xmm4
601 ; SSE41-NEXT: pshufb %xmm2, %xmm4
602 ; SSE41-NEXT: psrlw $4, %xmm0
603 ; SSE41-NEXT: pand %xmm1, %xmm0
604 ; SSE41-NEXT: pshufb %xmm0, %xmm3
605 ; SSE41-NEXT: paddb %xmm4, %xmm3
606 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
607 ; SSE41-NEXT: pcmpgtb %xmm3, %xmm0
610 ; AVX1-LABEL: ult_4_v16i8:
612 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
613 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
614 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
615 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
616 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
617 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
618 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
619 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
620 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
621 ; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
624 ; AVX2-LABEL: ult_4_v16i8:
626 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
627 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
628 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
629 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
630 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
631 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
632 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
633 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
634 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
635 ; AVX2-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
638 ; AVX512VPOPCNTDQ-LABEL: ult_4_v16i8:
639 ; AVX512VPOPCNTDQ: # %bb.0:
640 ; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
641 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
642 ; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0
643 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastb {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
644 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
645 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
646 ; AVX512VPOPCNTDQ-NEXT: retq
648 ; AVX512VPOPCNTDQVL-LABEL: ult_4_v16i8:
649 ; AVX512VPOPCNTDQVL: # %bb.0:
650 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
651 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
652 ; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0
653 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
654 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
655 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
656 ; AVX512VPOPCNTDQVL-NEXT: retq
658 ; BITALG_NOVLX-LABEL: ult_4_v16i8:
659 ; BITALG_NOVLX: # %bb.0:
660 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
661 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
662 ; BITALG_NOVLX-NEXT: vpbroadcastb {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
663 ; BITALG_NOVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
664 ; BITALG_NOVLX-NEXT: vzeroupper
665 ; BITALG_NOVLX-NEXT: retq
667 ; BITALG-LABEL: ult_4_v16i8:
669 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
670 ; BITALG-NEXT: vpbroadcastb {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
671 ; BITALG-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
673 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
674 %3 = icmp ult <16 x i8> %2, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
675 %4 = sext <16 x i1> %3 to <16 x i8>
679 define <16 x i8> @ugt_4_v16i8(<16 x i8> %0) {
680 ; SSE2-LABEL: ugt_4_v16i8:
682 ; SSE2-NEXT: movdqa %xmm0, %xmm1
683 ; SSE2-NEXT: psrlw $1, %xmm1
684 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
685 ; SSE2-NEXT: psubb %xmm1, %xmm0
686 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
687 ; SSE2-NEXT: movdqa %xmm0, %xmm2
688 ; SSE2-NEXT: pand %xmm1, %xmm2
689 ; SSE2-NEXT: psrlw $2, %xmm0
690 ; SSE2-NEXT: pand %xmm1, %xmm0
691 ; SSE2-NEXT: paddb %xmm2, %xmm0
692 ; SSE2-NEXT: movdqa %xmm0, %xmm1
693 ; SSE2-NEXT: psrlw $4, %xmm1
694 ; SSE2-NEXT: paddb %xmm1, %xmm0
695 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
696 ; SSE2-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
699 ; SSE3-LABEL: ugt_4_v16i8:
701 ; SSE3-NEXT: movdqa %xmm0, %xmm1
702 ; SSE3-NEXT: psrlw $1, %xmm1
703 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
704 ; SSE3-NEXT: psubb %xmm1, %xmm0
705 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
706 ; SSE3-NEXT: movdqa %xmm0, %xmm2
707 ; SSE3-NEXT: pand %xmm1, %xmm2
708 ; SSE3-NEXT: psrlw $2, %xmm0
709 ; SSE3-NEXT: pand %xmm1, %xmm0
710 ; SSE3-NEXT: paddb %xmm2, %xmm0
711 ; SSE3-NEXT: movdqa %xmm0, %xmm1
712 ; SSE3-NEXT: psrlw $4, %xmm1
713 ; SSE3-NEXT: paddb %xmm1, %xmm0
714 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
715 ; SSE3-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
718 ; SSSE3-LABEL: ugt_4_v16i8:
720 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
721 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
722 ; SSSE3-NEXT: pand %xmm2, %xmm3
723 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
724 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
725 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
726 ; SSSE3-NEXT: psrlw $4, %xmm0
727 ; SSSE3-NEXT: pand %xmm2, %xmm0
728 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
729 ; SSSE3-NEXT: paddb %xmm4, %xmm1
730 ; SSSE3-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
731 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
734 ; SSE41-LABEL: ugt_4_v16i8:
736 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
737 ; SSE41-NEXT: movdqa %xmm0, %xmm3
738 ; SSE41-NEXT: pand %xmm2, %xmm3
739 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
740 ; SSE41-NEXT: movdqa %xmm1, %xmm4
741 ; SSE41-NEXT: pshufb %xmm3, %xmm4
742 ; SSE41-NEXT: psrlw $4, %xmm0
743 ; SSE41-NEXT: pand %xmm2, %xmm0
744 ; SSE41-NEXT: pshufb %xmm0, %xmm1
745 ; SSE41-NEXT: paddb %xmm4, %xmm1
746 ; SSE41-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
747 ; SSE41-NEXT: movdqa %xmm1, %xmm0
750 ; AVX1-LABEL: ugt_4_v16i8:
752 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
753 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
754 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
755 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
756 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
757 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
758 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
759 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
760 ; AVX1-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
763 ; AVX2-LABEL: ugt_4_v16i8:
765 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
766 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
767 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
768 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
769 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
770 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
771 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
772 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
773 ; AVX2-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
776 ; AVX512VPOPCNTDQ-LABEL: ugt_4_v16i8:
777 ; AVX512VPOPCNTDQ: # %bb.0:
778 ; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
779 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
780 ; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0
781 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
782 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
783 ; AVX512VPOPCNTDQ-NEXT: retq
785 ; AVX512VPOPCNTDQVL-LABEL: ugt_4_v16i8:
786 ; AVX512VPOPCNTDQVL: # %bb.0:
787 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
788 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
789 ; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0
790 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
791 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
792 ; AVX512VPOPCNTDQVL-NEXT: retq
794 ; BITALG_NOVLX-LABEL: ugt_4_v16i8:
795 ; BITALG_NOVLX: # %bb.0:
796 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
797 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
798 ; BITALG_NOVLX-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
799 ; BITALG_NOVLX-NEXT: vzeroupper
800 ; BITALG_NOVLX-NEXT: retq
802 ; BITALG-LABEL: ugt_4_v16i8:
804 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
805 ; BITALG-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
807 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
808 %3 = icmp ugt <16 x i8> %2, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
809 %4 = sext <16 x i1> %3 to <16 x i8>
813 define <16 x i8> @ult_5_v16i8(<16 x i8> %0) {
814 ; SSE2-LABEL: ult_5_v16i8:
816 ; SSE2-NEXT: movdqa %xmm0, %xmm1
817 ; SSE2-NEXT: psrlw $1, %xmm1
818 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
819 ; SSE2-NEXT: psubb %xmm1, %xmm0
820 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
821 ; SSE2-NEXT: movdqa %xmm0, %xmm2
822 ; SSE2-NEXT: pand %xmm1, %xmm2
823 ; SSE2-NEXT: psrlw $2, %xmm0
824 ; SSE2-NEXT: pand %xmm1, %xmm0
825 ; SSE2-NEXT: paddb %xmm2, %xmm0
826 ; SSE2-NEXT: movdqa %xmm0, %xmm1
827 ; SSE2-NEXT: psrlw $4, %xmm1
828 ; SSE2-NEXT: paddb %xmm0, %xmm1
829 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
830 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
831 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm0
834 ; SSE3-LABEL: ult_5_v16i8:
836 ; SSE3-NEXT: movdqa %xmm0, %xmm1
837 ; SSE3-NEXT: psrlw $1, %xmm1
838 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
839 ; SSE3-NEXT: psubb %xmm1, %xmm0
840 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
841 ; SSE3-NEXT: movdqa %xmm0, %xmm2
842 ; SSE3-NEXT: pand %xmm1, %xmm2
843 ; SSE3-NEXT: psrlw $2, %xmm0
844 ; SSE3-NEXT: pand %xmm1, %xmm0
845 ; SSE3-NEXT: paddb %xmm2, %xmm0
846 ; SSE3-NEXT: movdqa %xmm0, %xmm1
847 ; SSE3-NEXT: psrlw $4, %xmm1
848 ; SSE3-NEXT: paddb %xmm0, %xmm1
849 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
850 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
851 ; SSE3-NEXT: pcmpgtb %xmm1, %xmm0
854 ; SSSE3-LABEL: ult_5_v16i8:
856 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
857 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
858 ; SSSE3-NEXT: pand %xmm1, %xmm2
859 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
860 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
861 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
862 ; SSSE3-NEXT: psrlw $4, %xmm0
863 ; SSSE3-NEXT: pand %xmm1, %xmm0
864 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
865 ; SSSE3-NEXT: paddb %xmm4, %xmm3
866 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
867 ; SSSE3-NEXT: pcmpgtb %xmm3, %xmm0
870 ; SSE41-LABEL: ult_5_v16i8:
872 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
873 ; SSE41-NEXT: movdqa %xmm0, %xmm2
874 ; SSE41-NEXT: pand %xmm1, %xmm2
875 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
876 ; SSE41-NEXT: movdqa %xmm3, %xmm4
877 ; SSE41-NEXT: pshufb %xmm2, %xmm4
878 ; SSE41-NEXT: psrlw $4, %xmm0
879 ; SSE41-NEXT: pand %xmm1, %xmm0
880 ; SSE41-NEXT: pshufb %xmm0, %xmm3
881 ; SSE41-NEXT: paddb %xmm4, %xmm3
882 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
883 ; SSE41-NEXT: pcmpgtb %xmm3, %xmm0
886 ; AVX1-LABEL: ult_5_v16i8:
888 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
889 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
890 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
891 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
892 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
893 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
894 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
895 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
896 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
897 ; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
900 ; AVX2-LABEL: ult_5_v16i8:
902 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
903 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
904 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
905 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
906 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
907 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
908 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
909 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
910 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
911 ; AVX2-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
914 ; AVX512VPOPCNTDQ-LABEL: ult_5_v16i8:
915 ; AVX512VPOPCNTDQ: # %bb.0:
916 ; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
917 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
918 ; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0
919 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastb {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
920 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
921 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
922 ; AVX512VPOPCNTDQ-NEXT: retq
924 ; AVX512VPOPCNTDQVL-LABEL: ult_5_v16i8:
925 ; AVX512VPOPCNTDQVL: # %bb.0:
926 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
927 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
928 ; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0
929 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
930 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
931 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
932 ; AVX512VPOPCNTDQVL-NEXT: retq
934 ; BITALG_NOVLX-LABEL: ult_5_v16i8:
935 ; BITALG_NOVLX: # %bb.0:
936 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
937 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
938 ; BITALG_NOVLX-NEXT: vpbroadcastb {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
939 ; BITALG_NOVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
940 ; BITALG_NOVLX-NEXT: vzeroupper
941 ; BITALG_NOVLX-NEXT: retq
943 ; BITALG-LABEL: ult_5_v16i8:
945 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
946 ; BITALG-NEXT: vpbroadcastb {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
947 ; BITALG-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
949 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
950 %3 = icmp ult <16 x i8> %2, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
951 %4 = sext <16 x i1> %3 to <16 x i8>
955 define <16 x i8> @ugt_5_v16i8(<16 x i8> %0) {
956 ; SSE2-LABEL: ugt_5_v16i8:
958 ; SSE2-NEXT: movdqa %xmm0, %xmm1
959 ; SSE2-NEXT: psrlw $1, %xmm1
960 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
961 ; SSE2-NEXT: psubb %xmm1, %xmm0
962 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
963 ; SSE2-NEXT: movdqa %xmm0, %xmm2
964 ; SSE2-NEXT: pand %xmm1, %xmm2
965 ; SSE2-NEXT: psrlw $2, %xmm0
966 ; SSE2-NEXT: pand %xmm1, %xmm0
967 ; SSE2-NEXT: paddb %xmm2, %xmm0
968 ; SSE2-NEXT: movdqa %xmm0, %xmm1
969 ; SSE2-NEXT: psrlw $4, %xmm1
970 ; SSE2-NEXT: paddb %xmm1, %xmm0
971 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
972 ; SSE2-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
975 ; SSE3-LABEL: ugt_5_v16i8:
977 ; SSE3-NEXT: movdqa %xmm0, %xmm1
978 ; SSE3-NEXT: psrlw $1, %xmm1
979 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
980 ; SSE3-NEXT: psubb %xmm1, %xmm0
981 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
982 ; SSE3-NEXT: movdqa %xmm0, %xmm2
983 ; SSE3-NEXT: pand %xmm1, %xmm2
984 ; SSE3-NEXT: psrlw $2, %xmm0
985 ; SSE3-NEXT: pand %xmm1, %xmm0
986 ; SSE3-NEXT: paddb %xmm2, %xmm0
987 ; SSE3-NEXT: movdqa %xmm0, %xmm1
988 ; SSE3-NEXT: psrlw $4, %xmm1
989 ; SSE3-NEXT: paddb %xmm1, %xmm0
990 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
991 ; SSE3-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
994 ; SSSE3-LABEL: ugt_5_v16i8:
996 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
997 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
998 ; SSSE3-NEXT: pand %xmm2, %xmm3
999 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1000 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
1001 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
1002 ; SSSE3-NEXT: psrlw $4, %xmm0
1003 ; SSSE3-NEXT: pand %xmm2, %xmm0
1004 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
1005 ; SSSE3-NEXT: paddb %xmm4, %xmm1
1006 ; SSSE3-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1007 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1010 ; SSE41-LABEL: ugt_5_v16i8:
1012 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1013 ; SSE41-NEXT: movdqa %xmm0, %xmm3
1014 ; SSE41-NEXT: pand %xmm2, %xmm3
1015 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1016 ; SSE41-NEXT: movdqa %xmm1, %xmm4
1017 ; SSE41-NEXT: pshufb %xmm3, %xmm4
1018 ; SSE41-NEXT: psrlw $4, %xmm0
1019 ; SSE41-NEXT: pand %xmm2, %xmm0
1020 ; SSE41-NEXT: pshufb %xmm0, %xmm1
1021 ; SSE41-NEXT: paddb %xmm4, %xmm1
1022 ; SSE41-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1023 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1026 ; AVX1-LABEL: ugt_5_v16i8:
1028 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1029 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
1030 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1031 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
1032 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
1033 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
1034 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1035 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1036 ; AVX1-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1039 ; AVX2-LABEL: ugt_5_v16i8:
1041 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1042 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
1043 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1044 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
1045 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
1046 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1047 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1048 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1049 ; AVX2-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1052 ; AVX512VPOPCNTDQ-LABEL: ugt_5_v16i8:
1053 ; AVX512VPOPCNTDQ: # %bb.0:
1054 ; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1055 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
1056 ; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0
1057 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1058 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
1059 ; AVX512VPOPCNTDQ-NEXT: retq
1061 ; AVX512VPOPCNTDQVL-LABEL: ugt_5_v16i8:
1062 ; AVX512VPOPCNTDQVL: # %bb.0:
1063 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1064 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
1065 ; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0
1066 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1067 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
1068 ; AVX512VPOPCNTDQVL-NEXT: retq
1070 ; BITALG_NOVLX-LABEL: ugt_5_v16i8:
1071 ; BITALG_NOVLX: # %bb.0:
1072 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1073 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
1074 ; BITALG_NOVLX-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1075 ; BITALG_NOVLX-NEXT: vzeroupper
1076 ; BITALG_NOVLX-NEXT: retq
1078 ; BITALG-LABEL: ugt_5_v16i8:
1080 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
1081 ; BITALG-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1083 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
1084 %3 = icmp ugt <16 x i8> %2, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
1085 %4 = sext <16 x i1> %3 to <16 x i8>
1089 define <16 x i8> @ult_6_v16i8(<16 x i8> %0) {
1090 ; SSE2-LABEL: ult_6_v16i8:
1092 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1093 ; SSE2-NEXT: psrlw $1, %xmm1
1094 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1095 ; SSE2-NEXT: psubb %xmm1, %xmm0
1096 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1097 ; SSE2-NEXT: movdqa %xmm0, %xmm2
1098 ; SSE2-NEXT: pand %xmm1, %xmm2
1099 ; SSE2-NEXT: psrlw $2, %xmm0
1100 ; SSE2-NEXT: pand %xmm1, %xmm0
1101 ; SSE2-NEXT: paddb %xmm2, %xmm0
1102 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1103 ; SSE2-NEXT: psrlw $4, %xmm1
1104 ; SSE2-NEXT: paddb %xmm0, %xmm1
1105 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1106 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
1107 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm0
1110 ; SSE3-LABEL: ult_6_v16i8:
1112 ; SSE3-NEXT: movdqa %xmm0, %xmm1
1113 ; SSE3-NEXT: psrlw $1, %xmm1
1114 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1115 ; SSE3-NEXT: psubb %xmm1, %xmm0
1116 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1117 ; SSE3-NEXT: movdqa %xmm0, %xmm2
1118 ; SSE3-NEXT: pand %xmm1, %xmm2
1119 ; SSE3-NEXT: psrlw $2, %xmm0
1120 ; SSE3-NEXT: pand %xmm1, %xmm0
1121 ; SSE3-NEXT: paddb %xmm2, %xmm0
1122 ; SSE3-NEXT: movdqa %xmm0, %xmm1
1123 ; SSE3-NEXT: psrlw $4, %xmm1
1124 ; SSE3-NEXT: paddb %xmm0, %xmm1
1125 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1126 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
1127 ; SSE3-NEXT: pcmpgtb %xmm1, %xmm0
1130 ; SSSE3-LABEL: ult_6_v16i8:
1132 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1133 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
1134 ; SSSE3-NEXT: pand %xmm1, %xmm2
1135 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1136 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
1137 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
1138 ; SSSE3-NEXT: psrlw $4, %xmm0
1139 ; SSSE3-NEXT: pand %xmm1, %xmm0
1140 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
1141 ; SSSE3-NEXT: paddb %xmm4, %xmm3
1142 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
1143 ; SSSE3-NEXT: pcmpgtb %xmm3, %xmm0
1146 ; SSE41-LABEL: ult_6_v16i8:
1148 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1149 ; SSE41-NEXT: movdqa %xmm0, %xmm2
1150 ; SSE41-NEXT: pand %xmm1, %xmm2
1151 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1152 ; SSE41-NEXT: movdqa %xmm3, %xmm4
1153 ; SSE41-NEXT: pshufb %xmm2, %xmm4
1154 ; SSE41-NEXT: psrlw $4, %xmm0
1155 ; SSE41-NEXT: pand %xmm1, %xmm0
1156 ; SSE41-NEXT: pshufb %xmm0, %xmm3
1157 ; SSE41-NEXT: paddb %xmm4, %xmm3
1158 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
1159 ; SSE41-NEXT: pcmpgtb %xmm3, %xmm0
1162 ; AVX1-LABEL: ult_6_v16i8:
1164 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1165 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
1166 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1167 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
1168 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
1169 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
1170 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1171 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1172 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
1173 ; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
1176 ; AVX2-LABEL: ult_6_v16i8:
1178 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1179 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
1180 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1181 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
1182 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
1183 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1184 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1185 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1186 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
1187 ; AVX2-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
1190 ; AVX512VPOPCNTDQ-LABEL: ult_6_v16i8:
1191 ; AVX512VPOPCNTDQ: # %bb.0:
1192 ; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1193 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
1194 ; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0
1195 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastb {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
1196 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
1197 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
1198 ; AVX512VPOPCNTDQ-NEXT: retq
1200 ; AVX512VPOPCNTDQVL-LABEL: ult_6_v16i8:
1201 ; AVX512VPOPCNTDQVL: # %bb.0:
1202 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1203 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
1204 ; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0
1205 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
1206 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
1207 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
1208 ; AVX512VPOPCNTDQVL-NEXT: retq
1210 ; BITALG_NOVLX-LABEL: ult_6_v16i8:
1211 ; BITALG_NOVLX: # %bb.0:
1212 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1213 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
1214 ; BITALG_NOVLX-NEXT: vpbroadcastb {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
1215 ; BITALG_NOVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
1216 ; BITALG_NOVLX-NEXT: vzeroupper
1217 ; BITALG_NOVLX-NEXT: retq
1219 ; BITALG-LABEL: ult_6_v16i8:
1221 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
1222 ; BITALG-NEXT: vpbroadcastb {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
1223 ; BITALG-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
1225 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
1226 %3 = icmp ult <16 x i8> %2, <i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6>
1227 %4 = sext <16 x i1> %3 to <16 x i8>
1231 define <16 x i8> @ugt_6_v16i8(<16 x i8> %0) {
1232 ; SSE2-LABEL: ugt_6_v16i8:
1234 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1235 ; SSE2-NEXT: psrlw $1, %xmm1
1236 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1237 ; SSE2-NEXT: psubb %xmm1, %xmm0
1238 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1239 ; SSE2-NEXT: movdqa %xmm0, %xmm2
1240 ; SSE2-NEXT: pand %xmm1, %xmm2
1241 ; SSE2-NEXT: psrlw $2, %xmm0
1242 ; SSE2-NEXT: pand %xmm1, %xmm0
1243 ; SSE2-NEXT: paddb %xmm2, %xmm0
1244 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1245 ; SSE2-NEXT: psrlw $4, %xmm1
1246 ; SSE2-NEXT: paddb %xmm1, %xmm0
1247 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1248 ; SSE2-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1251 ; SSE3-LABEL: ugt_6_v16i8:
1253 ; SSE3-NEXT: movdqa %xmm0, %xmm1
1254 ; SSE3-NEXT: psrlw $1, %xmm1
1255 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1256 ; SSE3-NEXT: psubb %xmm1, %xmm0
1257 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1258 ; SSE3-NEXT: movdqa %xmm0, %xmm2
1259 ; SSE3-NEXT: pand %xmm1, %xmm2
1260 ; SSE3-NEXT: psrlw $2, %xmm0
1261 ; SSE3-NEXT: pand %xmm1, %xmm0
1262 ; SSE3-NEXT: paddb %xmm2, %xmm0
1263 ; SSE3-NEXT: movdqa %xmm0, %xmm1
1264 ; SSE3-NEXT: psrlw $4, %xmm1
1265 ; SSE3-NEXT: paddb %xmm1, %xmm0
1266 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1267 ; SSE3-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1270 ; SSSE3-LABEL: ugt_6_v16i8:
1272 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1273 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
1274 ; SSSE3-NEXT: pand %xmm2, %xmm3
1275 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1276 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
1277 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
1278 ; SSSE3-NEXT: psrlw $4, %xmm0
1279 ; SSSE3-NEXT: pand %xmm2, %xmm0
1280 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
1281 ; SSSE3-NEXT: paddb %xmm4, %xmm1
1282 ; SSSE3-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1283 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1286 ; SSE41-LABEL: ugt_6_v16i8:
1288 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1289 ; SSE41-NEXT: movdqa %xmm0, %xmm3
1290 ; SSE41-NEXT: pand %xmm2, %xmm3
1291 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1292 ; SSE41-NEXT: movdqa %xmm1, %xmm4
1293 ; SSE41-NEXT: pshufb %xmm3, %xmm4
1294 ; SSE41-NEXT: psrlw $4, %xmm0
1295 ; SSE41-NEXT: pand %xmm2, %xmm0
1296 ; SSE41-NEXT: pshufb %xmm0, %xmm1
1297 ; SSE41-NEXT: paddb %xmm4, %xmm1
1298 ; SSE41-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1299 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1302 ; AVX1-LABEL: ugt_6_v16i8:
1304 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1305 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
1306 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1307 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
1308 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
1309 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
1310 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1311 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1312 ; AVX1-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1315 ; AVX2-LABEL: ugt_6_v16i8:
1317 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1318 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
1319 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1320 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
1321 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
1322 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1323 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1324 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1325 ; AVX2-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1328 ; AVX512VPOPCNTDQ-LABEL: ugt_6_v16i8:
1329 ; AVX512VPOPCNTDQ: # %bb.0:
1330 ; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1331 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
1332 ; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0
1333 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1334 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
1335 ; AVX512VPOPCNTDQ-NEXT: retq
1337 ; AVX512VPOPCNTDQVL-LABEL: ugt_6_v16i8:
1338 ; AVX512VPOPCNTDQVL: # %bb.0:
1339 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1340 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
1341 ; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0
1342 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1343 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
1344 ; AVX512VPOPCNTDQVL-NEXT: retq
1346 ; BITALG_NOVLX-LABEL: ugt_6_v16i8:
1347 ; BITALG_NOVLX: # %bb.0:
1348 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1349 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
1350 ; BITALG_NOVLX-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1351 ; BITALG_NOVLX-NEXT: vzeroupper
1352 ; BITALG_NOVLX-NEXT: retq
1354 ; BITALG-LABEL: ugt_6_v16i8:
1356 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
1357 ; BITALG-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1359 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
1360 %3 = icmp ugt <16 x i8> %2, <i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6>
1361 %4 = sext <16 x i1> %3 to <16 x i8>
1365 define <16 x i8> @ult_7_v16i8(<16 x i8> %0) {
1366 ; SSE2-LABEL: ult_7_v16i8:
1368 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1369 ; SSE2-NEXT: psrlw $1, %xmm1
1370 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1371 ; SSE2-NEXT: psubb %xmm1, %xmm0
1372 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1373 ; SSE2-NEXT: movdqa %xmm0, %xmm2
1374 ; SSE2-NEXT: pand %xmm1, %xmm2
1375 ; SSE2-NEXT: psrlw $2, %xmm0
1376 ; SSE2-NEXT: pand %xmm1, %xmm0
1377 ; SSE2-NEXT: paddb %xmm2, %xmm0
1378 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1379 ; SSE2-NEXT: psrlw $4, %xmm1
1380 ; SSE2-NEXT: paddb %xmm0, %xmm1
1381 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1382 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1383 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm0
1386 ; SSE3-LABEL: ult_7_v16i8:
1388 ; SSE3-NEXT: movdqa %xmm0, %xmm1
1389 ; SSE3-NEXT: psrlw $1, %xmm1
1390 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1391 ; SSE3-NEXT: psubb %xmm1, %xmm0
1392 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1393 ; SSE3-NEXT: movdqa %xmm0, %xmm2
1394 ; SSE3-NEXT: pand %xmm1, %xmm2
1395 ; SSE3-NEXT: psrlw $2, %xmm0
1396 ; SSE3-NEXT: pand %xmm1, %xmm0
1397 ; SSE3-NEXT: paddb %xmm2, %xmm0
1398 ; SSE3-NEXT: movdqa %xmm0, %xmm1
1399 ; SSE3-NEXT: psrlw $4, %xmm1
1400 ; SSE3-NEXT: paddb %xmm0, %xmm1
1401 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1402 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1403 ; SSE3-NEXT: pcmpgtb %xmm1, %xmm0
1406 ; SSSE3-LABEL: ult_7_v16i8:
1408 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1409 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
1410 ; SSSE3-NEXT: pand %xmm1, %xmm2
1411 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1412 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
1413 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
1414 ; SSSE3-NEXT: psrlw $4, %xmm0
1415 ; SSSE3-NEXT: pand %xmm1, %xmm0
1416 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
1417 ; SSSE3-NEXT: paddb %xmm4, %xmm3
1418 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1419 ; SSSE3-NEXT: pcmpgtb %xmm3, %xmm0
1422 ; SSE41-LABEL: ult_7_v16i8:
1424 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1425 ; SSE41-NEXT: movdqa %xmm0, %xmm2
1426 ; SSE41-NEXT: pand %xmm1, %xmm2
1427 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1428 ; SSE41-NEXT: movdqa %xmm3, %xmm4
1429 ; SSE41-NEXT: pshufb %xmm2, %xmm4
1430 ; SSE41-NEXT: psrlw $4, %xmm0
1431 ; SSE41-NEXT: pand %xmm1, %xmm0
1432 ; SSE41-NEXT: pshufb %xmm0, %xmm3
1433 ; SSE41-NEXT: paddb %xmm4, %xmm3
1434 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1435 ; SSE41-NEXT: pcmpgtb %xmm3, %xmm0
1438 ; AVX1-LABEL: ult_7_v16i8:
1440 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1441 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
1442 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1443 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
1444 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
1445 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
1446 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1447 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1448 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1449 ; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
1452 ; AVX2-LABEL: ult_7_v16i8:
1454 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1455 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
1456 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1457 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
1458 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
1459 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1460 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1461 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1462 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1463 ; AVX2-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
1466 ; AVX512VPOPCNTDQ-LABEL: ult_7_v16i8:
1467 ; AVX512VPOPCNTDQ: # %bb.0:
1468 ; AVX512VPOPCNTDQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1469 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
1470 ; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0
1471 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastb {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1472 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
1473 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
1474 ; AVX512VPOPCNTDQ-NEXT: retq
1476 ; AVX512VPOPCNTDQVL-LABEL: ult_7_v16i8:
1477 ; AVX512VPOPCNTDQVL: # %bb.0:
1478 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1479 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
1480 ; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0
1481 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1482 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
1483 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
1484 ; AVX512VPOPCNTDQVL-NEXT: retq
1486 ; BITALG_NOVLX-LABEL: ult_7_v16i8:
1487 ; BITALG_NOVLX: # %bb.0:
1488 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1489 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
1490 ; BITALG_NOVLX-NEXT: vpbroadcastb {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1491 ; BITALG_NOVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
1492 ; BITALG_NOVLX-NEXT: vzeroupper
1493 ; BITALG_NOVLX-NEXT: retq
1495 ; BITALG-LABEL: ult_7_v16i8:
1497 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
1498 ; BITALG-NEXT: vpbroadcastb {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1499 ; BITALG-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
1501 %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0)
1502 %3 = icmp ult <16 x i8> %2, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
1503 %4 = sext <16 x i1> %3 to <16 x i8>
1507 define <8 x i16> @ugt_1_v8i16(<8 x i16> %0) {
1508 ; SSE-LABEL: ugt_1_v8i16:
1510 ; SSE-NEXT: pcmpeqd %xmm1, %xmm1
1511 ; SSE-NEXT: movdqa %xmm0, %xmm2
1512 ; SSE-NEXT: paddw %xmm1, %xmm2
1513 ; SSE-NEXT: pand %xmm2, %xmm0
1514 ; SSE-NEXT: pxor %xmm2, %xmm2
1515 ; SSE-NEXT: pcmpeqw %xmm2, %xmm0
1516 ; SSE-NEXT: pxor %xmm1, %xmm0
1519 ; AVX1-LABEL: ugt_1_v8i16:
1521 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
1522 ; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm2
1523 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
1524 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1525 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
1526 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
1529 ; AVX2-LABEL: ugt_1_v8i16:
1531 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
1532 ; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm2
1533 ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
1534 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1535 ; AVX2-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
1536 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
1539 ; AVX512VPOPCNTDQ-LABEL: ugt_1_v8i16:
1540 ; AVX512VPOPCNTDQ: # %bb.0:
1541 ; AVX512VPOPCNTDQ-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
1542 ; AVX512VPOPCNTDQ-NEXT: vpaddw %xmm1, %xmm0, %xmm1
1543 ; AVX512VPOPCNTDQ-NEXT: vpand %xmm1, %xmm0, %xmm0
1544 ; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
1545 ; AVX512VPOPCNTDQ-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
1546 ; AVX512VPOPCNTDQ-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
1547 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
1548 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
1549 ; AVX512VPOPCNTDQ-NEXT: retq
1551 ; AVX512VPOPCNTDQVL-LABEL: ugt_1_v8i16:
1552 ; AVX512VPOPCNTDQVL: # %bb.0:
1553 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
1554 ; AVX512VPOPCNTDQVL-NEXT: vpaddw %xmm1, %xmm0, %xmm1
1555 ; AVX512VPOPCNTDQVL-NEXT: vpand %xmm1, %xmm0, %xmm0
1556 ; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1557 ; AVX512VPOPCNTDQVL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
1558 ; AVX512VPOPCNTDQVL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
1559 ; AVX512VPOPCNTDQVL-NEXT: retq
1561 ; BITALG_NOVLX-LABEL: ugt_1_v8i16:
1562 ; BITALG_NOVLX: # %bb.0:
1563 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1564 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
1565 ; BITALG_NOVLX-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1566 ; BITALG_NOVLX-NEXT: vzeroupper
1567 ; BITALG_NOVLX-NEXT: retq
1569 ; BITALG-LABEL: ugt_1_v8i16:
1571 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
1572 ; BITALG-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1574 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
1575 %3 = icmp ugt <8 x i16> %2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1576 %4 = sext <8 x i1> %3 to <8 x i16>
1580 define <8 x i16> @ult_2_v8i16(<8 x i16> %0) {
1581 ; SSE-LABEL: ult_2_v8i16:
1583 ; SSE-NEXT: pcmpeqd %xmm1, %xmm1
1584 ; SSE-NEXT: paddw %xmm0, %xmm1
1585 ; SSE-NEXT: pand %xmm1, %xmm0
1586 ; SSE-NEXT: pxor %xmm1, %xmm1
1587 ; SSE-NEXT: pcmpeqw %xmm1, %xmm0
1590 ; AVX-LABEL: ult_2_v8i16:
1592 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
1593 ; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm1
1594 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
1595 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1596 ; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
1599 ; BITALG_NOVLX-LABEL: ult_2_v8i16:
1600 ; BITALG_NOVLX: # %bb.0:
1601 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1602 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
1603 ; BITALG_NOVLX-NEXT: vpbroadcastw {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2]
1604 ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
1605 ; BITALG_NOVLX-NEXT: vzeroupper
1606 ; BITALG_NOVLX-NEXT: retq
1608 ; BITALG-LABEL: ult_2_v8i16:
1610 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
1611 ; BITALG-NEXT: vpbroadcastw {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2]
1612 ; BITALG-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
1614 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
1615 %3 = icmp ult <8 x i16> %2, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
1616 %4 = sext <8 x i1> %3 to <8 x i16>
1620 define <8 x i16> @ugt_2_v8i16(<8 x i16> %0) {
1621 ; SSE2-LABEL: ugt_2_v8i16:
1623 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1624 ; SSE2-NEXT: psrlw $1, %xmm1
1625 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1626 ; SSE2-NEXT: psubb %xmm1, %xmm0
1627 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1628 ; SSE2-NEXT: movdqa %xmm0, %xmm2
1629 ; SSE2-NEXT: pand %xmm1, %xmm2
1630 ; SSE2-NEXT: psrlw $2, %xmm0
1631 ; SSE2-NEXT: pand %xmm1, %xmm0
1632 ; SSE2-NEXT: paddb %xmm2, %xmm0
1633 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1634 ; SSE2-NEXT: psrlw $4, %xmm1
1635 ; SSE2-NEXT: paddb %xmm1, %xmm0
1636 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1637 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1638 ; SSE2-NEXT: psllw $8, %xmm1
1639 ; SSE2-NEXT: paddb %xmm1, %xmm0
1640 ; SSE2-NEXT: psrlw $8, %xmm0
1641 ; SSE2-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1644 ; SSE3-LABEL: ugt_2_v8i16:
1646 ; SSE3-NEXT: movdqa %xmm0, %xmm1
1647 ; SSE3-NEXT: psrlw $1, %xmm1
1648 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1649 ; SSE3-NEXT: psubb %xmm1, %xmm0
1650 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1651 ; SSE3-NEXT: movdqa %xmm0, %xmm2
1652 ; SSE3-NEXT: pand %xmm1, %xmm2
1653 ; SSE3-NEXT: psrlw $2, %xmm0
1654 ; SSE3-NEXT: pand %xmm1, %xmm0
1655 ; SSE3-NEXT: paddb %xmm2, %xmm0
1656 ; SSE3-NEXT: movdqa %xmm0, %xmm1
1657 ; SSE3-NEXT: psrlw $4, %xmm1
1658 ; SSE3-NEXT: paddb %xmm1, %xmm0
1659 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1660 ; SSE3-NEXT: movdqa %xmm0, %xmm1
1661 ; SSE3-NEXT: psllw $8, %xmm1
1662 ; SSE3-NEXT: paddb %xmm1, %xmm0
1663 ; SSE3-NEXT: psrlw $8, %xmm0
1664 ; SSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1667 ; SSSE3-LABEL: ugt_2_v8i16:
1669 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1670 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
1671 ; SSSE3-NEXT: pand %xmm1, %xmm2
1672 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1673 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
1674 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
1675 ; SSSE3-NEXT: psrlw $4, %xmm0
1676 ; SSSE3-NEXT: pand %xmm1, %xmm0
1677 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
1678 ; SSSE3-NEXT: paddb %xmm4, %xmm3
1679 ; SSSE3-NEXT: movdqa %xmm3, %xmm0
1680 ; SSSE3-NEXT: psllw $8, %xmm0
1681 ; SSSE3-NEXT: paddb %xmm3, %xmm0
1682 ; SSSE3-NEXT: psrlw $8, %xmm0
1683 ; SSSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1686 ; SSE41-LABEL: ugt_2_v8i16:
1688 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1689 ; SSE41-NEXT: movdqa %xmm0, %xmm2
1690 ; SSE41-NEXT: pand %xmm1, %xmm2
1691 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1692 ; SSE41-NEXT: movdqa %xmm3, %xmm4
1693 ; SSE41-NEXT: pshufb %xmm2, %xmm4
1694 ; SSE41-NEXT: psrlw $4, %xmm0
1695 ; SSE41-NEXT: pand %xmm1, %xmm0
1696 ; SSE41-NEXT: pshufb %xmm0, %xmm3
1697 ; SSE41-NEXT: paddb %xmm4, %xmm3
1698 ; SSE41-NEXT: movdqa %xmm3, %xmm0
1699 ; SSE41-NEXT: psllw $8, %xmm0
1700 ; SSE41-NEXT: paddb %xmm3, %xmm0
1701 ; SSE41-NEXT: psrlw $8, %xmm0
1702 ; SSE41-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1705 ; AVX1-LABEL: ugt_2_v8i16:
1707 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1708 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
1709 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1710 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
1711 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
1712 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
1713 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1714 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1715 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
1716 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
1717 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
1718 ; AVX1-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1721 ; AVX2-LABEL: ugt_2_v8i16:
1723 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1724 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
1725 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1726 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
1727 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
1728 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1729 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1730 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1731 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
1732 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
1733 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
1734 ; AVX2-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1737 ; AVX512VPOPCNTDQ-LABEL: ugt_2_v8i16:
1738 ; AVX512VPOPCNTDQ: # %bb.0:
1739 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1740 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
1741 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
1742 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1743 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
1744 ; AVX512VPOPCNTDQ-NEXT: retq
1746 ; AVX512VPOPCNTDQVL-LABEL: ugt_2_v8i16:
1747 ; AVX512VPOPCNTDQVL: # %bb.0:
1748 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1749 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
1750 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
1751 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1752 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
1753 ; AVX512VPOPCNTDQVL-NEXT: retq
1755 ; BITALG_NOVLX-LABEL: ugt_2_v8i16:
1756 ; BITALG_NOVLX: # %bb.0:
1757 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1758 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
1759 ; BITALG_NOVLX-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1760 ; BITALG_NOVLX-NEXT: vzeroupper
1761 ; BITALG_NOVLX-NEXT: retq
1763 ; BITALG-LABEL: ugt_2_v8i16:
1765 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
1766 ; BITALG-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1768 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
1769 %3 = icmp ugt <8 x i16> %2, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
1770 %4 = sext <8 x i1> %3 to <8 x i16>
1774 define <8 x i16> @ult_3_v8i16(<8 x i16> %0) {
1775 ; SSE2-LABEL: ult_3_v8i16:
1777 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1778 ; SSE2-NEXT: psrlw $1, %xmm1
1779 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1780 ; SSE2-NEXT: psubb %xmm1, %xmm0
1781 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1782 ; SSE2-NEXT: movdqa %xmm0, %xmm2
1783 ; SSE2-NEXT: pand %xmm1, %xmm2
1784 ; SSE2-NEXT: psrlw $2, %xmm0
1785 ; SSE2-NEXT: pand %xmm1, %xmm0
1786 ; SSE2-NEXT: paddb %xmm2, %xmm0
1787 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1788 ; SSE2-NEXT: psrlw $4, %xmm1
1789 ; SSE2-NEXT: paddb %xmm0, %xmm1
1790 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1791 ; SSE2-NEXT: movdqa %xmm1, %xmm2
1792 ; SSE2-NEXT: psllw $8, %xmm2
1793 ; SSE2-NEXT: paddb %xmm1, %xmm2
1794 ; SSE2-NEXT: psrlw $8, %xmm2
1795 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3]
1796 ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0
1799 ; SSE3-LABEL: ult_3_v8i16:
1801 ; SSE3-NEXT: movdqa %xmm0, %xmm1
1802 ; SSE3-NEXT: psrlw $1, %xmm1
1803 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1804 ; SSE3-NEXT: psubb %xmm1, %xmm0
1805 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1806 ; SSE3-NEXT: movdqa %xmm0, %xmm2
1807 ; SSE3-NEXT: pand %xmm1, %xmm2
1808 ; SSE3-NEXT: psrlw $2, %xmm0
1809 ; SSE3-NEXT: pand %xmm1, %xmm0
1810 ; SSE3-NEXT: paddb %xmm2, %xmm0
1811 ; SSE3-NEXT: movdqa %xmm0, %xmm1
1812 ; SSE3-NEXT: psrlw $4, %xmm1
1813 ; SSE3-NEXT: paddb %xmm0, %xmm1
1814 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1815 ; SSE3-NEXT: movdqa %xmm1, %xmm2
1816 ; SSE3-NEXT: psllw $8, %xmm2
1817 ; SSE3-NEXT: paddb %xmm1, %xmm2
1818 ; SSE3-NEXT: psrlw $8, %xmm2
1819 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3]
1820 ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0
1823 ; SSSE3-LABEL: ult_3_v8i16:
1825 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1826 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
1827 ; SSSE3-NEXT: pand %xmm1, %xmm2
1828 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1829 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
1830 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
1831 ; SSSE3-NEXT: psrlw $4, %xmm0
1832 ; SSSE3-NEXT: pand %xmm1, %xmm0
1833 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
1834 ; SSSE3-NEXT: paddb %xmm4, %xmm3
1835 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
1836 ; SSSE3-NEXT: psllw $8, %xmm1
1837 ; SSSE3-NEXT: paddb %xmm3, %xmm1
1838 ; SSSE3-NEXT: psrlw $8, %xmm1
1839 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3]
1840 ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
1843 ; SSE41-LABEL: ult_3_v8i16:
1845 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1846 ; SSE41-NEXT: movdqa %xmm0, %xmm2
1847 ; SSE41-NEXT: pand %xmm1, %xmm2
1848 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1849 ; SSE41-NEXT: movdqa %xmm3, %xmm4
1850 ; SSE41-NEXT: pshufb %xmm2, %xmm4
1851 ; SSE41-NEXT: psrlw $4, %xmm0
1852 ; SSE41-NEXT: pand %xmm1, %xmm0
1853 ; SSE41-NEXT: pshufb %xmm0, %xmm3
1854 ; SSE41-NEXT: paddb %xmm4, %xmm3
1855 ; SSE41-NEXT: movdqa %xmm3, %xmm1
1856 ; SSE41-NEXT: psllw $8, %xmm1
1857 ; SSE41-NEXT: paddb %xmm3, %xmm1
1858 ; SSE41-NEXT: psrlw $8, %xmm1
1859 ; SSE41-NEXT: pmovsxbw {{.*#+}} xmm0 = [3,3,3,3,3,3,3,3]
1860 ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0
1863 ; AVX1-LABEL: ult_3_v8i16:
1865 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1866 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
1867 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1868 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
1869 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
1870 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
1871 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1872 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1873 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
1874 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
1875 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
1876 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3]
1877 ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
1880 ; AVX2-LABEL: ult_3_v8i16:
1882 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1883 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
1884 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1885 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
1886 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
1887 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1888 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
1889 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
1890 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
1891 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
1892 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
1893 ; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3]
1894 ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
1897 ; AVX512VPOPCNTDQ-LABEL: ult_3_v8i16:
1898 ; AVX512VPOPCNTDQ: # %bb.0:
1899 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1900 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
1901 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
1902 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastw {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3]
1903 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
1904 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
1905 ; AVX512VPOPCNTDQ-NEXT: retq
1907 ; AVX512VPOPCNTDQVL-LABEL: ult_3_v8i16:
1908 ; AVX512VPOPCNTDQVL: # %bb.0:
1909 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1910 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
1911 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
1912 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3]
1913 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
1914 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
1915 ; AVX512VPOPCNTDQVL-NEXT: retq
1917 ; BITALG_NOVLX-LABEL: ult_3_v8i16:
1918 ; BITALG_NOVLX: # %bb.0:
1919 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
1920 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
1921 ; BITALG_NOVLX-NEXT: vpbroadcastw {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3]
1922 ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
1923 ; BITALG_NOVLX-NEXT: vzeroupper
1924 ; BITALG_NOVLX-NEXT: retq
1926 ; BITALG-LABEL: ult_3_v8i16:
1928 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
1929 ; BITALG-NEXT: vpbroadcastw {{.*#+}} xmm1 = [3,3,3,3,3,3,3,3]
1930 ; BITALG-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
1932 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
1933 %3 = icmp ult <8 x i16> %2, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
1934 %4 = sext <8 x i1> %3 to <8 x i16>
1938 define <8 x i16> @ugt_3_v8i16(<8 x i16> %0) {
1939 ; SSE2-LABEL: ugt_3_v8i16:
1941 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1942 ; SSE2-NEXT: psrlw $1, %xmm1
1943 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1944 ; SSE2-NEXT: psubb %xmm1, %xmm0
1945 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1946 ; SSE2-NEXT: movdqa %xmm0, %xmm2
1947 ; SSE2-NEXT: pand %xmm1, %xmm2
1948 ; SSE2-NEXT: psrlw $2, %xmm0
1949 ; SSE2-NEXT: pand %xmm1, %xmm0
1950 ; SSE2-NEXT: paddb %xmm2, %xmm0
1951 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1952 ; SSE2-NEXT: psrlw $4, %xmm1
1953 ; SSE2-NEXT: paddb %xmm1, %xmm0
1954 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1955 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1956 ; SSE2-NEXT: psllw $8, %xmm1
1957 ; SSE2-NEXT: paddb %xmm1, %xmm0
1958 ; SSE2-NEXT: psrlw $8, %xmm0
1959 ; SSE2-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1962 ; SSE3-LABEL: ugt_3_v8i16:
1964 ; SSE3-NEXT: movdqa %xmm0, %xmm1
1965 ; SSE3-NEXT: psrlw $1, %xmm1
1966 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1967 ; SSE3-NEXT: psubb %xmm1, %xmm0
1968 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1969 ; SSE3-NEXT: movdqa %xmm0, %xmm2
1970 ; SSE3-NEXT: pand %xmm1, %xmm2
1971 ; SSE3-NEXT: psrlw $2, %xmm0
1972 ; SSE3-NEXT: pand %xmm1, %xmm0
1973 ; SSE3-NEXT: paddb %xmm2, %xmm0
1974 ; SSE3-NEXT: movdqa %xmm0, %xmm1
1975 ; SSE3-NEXT: psrlw $4, %xmm1
1976 ; SSE3-NEXT: paddb %xmm1, %xmm0
1977 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1978 ; SSE3-NEXT: movdqa %xmm0, %xmm1
1979 ; SSE3-NEXT: psllw $8, %xmm1
1980 ; SSE3-NEXT: paddb %xmm1, %xmm0
1981 ; SSE3-NEXT: psrlw $8, %xmm0
1982 ; SSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1985 ; SSSE3-LABEL: ugt_3_v8i16:
1987 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1988 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
1989 ; SSSE3-NEXT: pand %xmm1, %xmm2
1990 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1991 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
1992 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
1993 ; SSSE3-NEXT: psrlw $4, %xmm0
1994 ; SSSE3-NEXT: pand %xmm1, %xmm0
1995 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
1996 ; SSSE3-NEXT: paddb %xmm4, %xmm3
1997 ; SSSE3-NEXT: movdqa %xmm3, %xmm0
1998 ; SSSE3-NEXT: psllw $8, %xmm0
1999 ; SSSE3-NEXT: paddb %xmm3, %xmm0
2000 ; SSSE3-NEXT: psrlw $8, %xmm0
2001 ; SSSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2004 ; SSE41-LABEL: ugt_3_v8i16:
2006 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2007 ; SSE41-NEXT: movdqa %xmm0, %xmm2
2008 ; SSE41-NEXT: pand %xmm1, %xmm2
2009 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2010 ; SSE41-NEXT: movdqa %xmm3, %xmm4
2011 ; SSE41-NEXT: pshufb %xmm2, %xmm4
2012 ; SSE41-NEXT: psrlw $4, %xmm0
2013 ; SSE41-NEXT: pand %xmm1, %xmm0
2014 ; SSE41-NEXT: pshufb %xmm0, %xmm3
2015 ; SSE41-NEXT: paddb %xmm4, %xmm3
2016 ; SSE41-NEXT: movdqa %xmm3, %xmm0
2017 ; SSE41-NEXT: psllw $8, %xmm0
2018 ; SSE41-NEXT: paddb %xmm3, %xmm0
2019 ; SSE41-NEXT: psrlw $8, %xmm0
2020 ; SSE41-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2023 ; AVX1-LABEL: ugt_3_v8i16:
2025 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2026 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
2027 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2028 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
2029 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
2030 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
2031 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
2032 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
2033 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
2034 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
2035 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
2036 ; AVX1-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2039 ; AVX2-LABEL: ugt_3_v8i16:
2041 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2042 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
2043 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2044 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
2045 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
2046 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
2047 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
2048 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
2049 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
2050 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
2051 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
2052 ; AVX2-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2055 ; AVX512VPOPCNTDQ-LABEL: ugt_3_v8i16:
2056 ; AVX512VPOPCNTDQ: # %bb.0:
2057 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2058 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
2059 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
2060 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2061 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
2062 ; AVX512VPOPCNTDQ-NEXT: retq
2064 ; AVX512VPOPCNTDQVL-LABEL: ugt_3_v8i16:
2065 ; AVX512VPOPCNTDQVL: # %bb.0:
2066 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2067 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
2068 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
2069 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2070 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
2071 ; AVX512VPOPCNTDQVL-NEXT: retq
2073 ; BITALG_NOVLX-LABEL: ugt_3_v8i16:
2074 ; BITALG_NOVLX: # %bb.0:
2075 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2076 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
2077 ; BITALG_NOVLX-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2078 ; BITALG_NOVLX-NEXT: vzeroupper
2079 ; BITALG_NOVLX-NEXT: retq
2081 ; BITALG-LABEL: ugt_3_v8i16:
2083 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
2084 ; BITALG-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2086 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
2087 %3 = icmp ugt <8 x i16> %2, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
2088 %4 = sext <8 x i1> %3 to <8 x i16>
2092 define <8 x i16> @ult_4_v8i16(<8 x i16> %0) {
2093 ; SSE2-LABEL: ult_4_v8i16:
2095 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2096 ; SSE2-NEXT: psrlw $1, %xmm1
2097 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2098 ; SSE2-NEXT: psubb %xmm1, %xmm0
2099 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2100 ; SSE2-NEXT: movdqa %xmm0, %xmm2
2101 ; SSE2-NEXT: pand %xmm1, %xmm2
2102 ; SSE2-NEXT: psrlw $2, %xmm0
2103 ; SSE2-NEXT: pand %xmm1, %xmm0
2104 ; SSE2-NEXT: paddb %xmm2, %xmm0
2105 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2106 ; SSE2-NEXT: psrlw $4, %xmm1
2107 ; SSE2-NEXT: paddb %xmm0, %xmm1
2108 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2109 ; SSE2-NEXT: movdqa %xmm1, %xmm2
2110 ; SSE2-NEXT: psllw $8, %xmm2
2111 ; SSE2-NEXT: paddb %xmm1, %xmm2
2112 ; SSE2-NEXT: psrlw $8, %xmm2
2113 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4]
2114 ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0
2117 ; SSE3-LABEL: ult_4_v8i16:
2119 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2120 ; SSE3-NEXT: psrlw $1, %xmm1
2121 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2122 ; SSE3-NEXT: psubb %xmm1, %xmm0
2123 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2124 ; SSE3-NEXT: movdqa %xmm0, %xmm2
2125 ; SSE3-NEXT: pand %xmm1, %xmm2
2126 ; SSE3-NEXT: psrlw $2, %xmm0
2127 ; SSE3-NEXT: pand %xmm1, %xmm0
2128 ; SSE3-NEXT: paddb %xmm2, %xmm0
2129 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2130 ; SSE3-NEXT: psrlw $4, %xmm1
2131 ; SSE3-NEXT: paddb %xmm0, %xmm1
2132 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2133 ; SSE3-NEXT: movdqa %xmm1, %xmm2
2134 ; SSE3-NEXT: psllw $8, %xmm2
2135 ; SSE3-NEXT: paddb %xmm1, %xmm2
2136 ; SSE3-NEXT: psrlw $8, %xmm2
2137 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4]
2138 ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0
2141 ; SSSE3-LABEL: ult_4_v8i16:
2143 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2144 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
2145 ; SSSE3-NEXT: pand %xmm1, %xmm2
2146 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2147 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
2148 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
2149 ; SSSE3-NEXT: psrlw $4, %xmm0
2150 ; SSSE3-NEXT: pand %xmm1, %xmm0
2151 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
2152 ; SSSE3-NEXT: paddb %xmm4, %xmm3
2153 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
2154 ; SSSE3-NEXT: psllw $8, %xmm1
2155 ; SSSE3-NEXT: paddb %xmm3, %xmm1
2156 ; SSSE3-NEXT: psrlw $8, %xmm1
2157 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4]
2158 ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
2161 ; SSE41-LABEL: ult_4_v8i16:
2163 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2164 ; SSE41-NEXT: movdqa %xmm0, %xmm2
2165 ; SSE41-NEXT: pand %xmm1, %xmm2
2166 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2167 ; SSE41-NEXT: movdqa %xmm3, %xmm4
2168 ; SSE41-NEXT: pshufb %xmm2, %xmm4
2169 ; SSE41-NEXT: psrlw $4, %xmm0
2170 ; SSE41-NEXT: pand %xmm1, %xmm0
2171 ; SSE41-NEXT: pshufb %xmm0, %xmm3
2172 ; SSE41-NEXT: paddb %xmm4, %xmm3
2173 ; SSE41-NEXT: movdqa %xmm3, %xmm1
2174 ; SSE41-NEXT: psllw $8, %xmm1
2175 ; SSE41-NEXT: paddb %xmm3, %xmm1
2176 ; SSE41-NEXT: psrlw $8, %xmm1
2177 ; SSE41-NEXT: pmovsxbw {{.*#+}} xmm0 = [4,4,4,4,4,4,4,4]
2178 ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0
2181 ; AVX1-LABEL: ult_4_v8i16:
2183 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2184 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
2185 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2186 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
2187 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
2188 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
2189 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
2190 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
2191 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
2192 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
2193 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
2194 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4]
2195 ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2198 ; AVX2-LABEL: ult_4_v8i16:
2200 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2201 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
2202 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2203 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
2204 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
2205 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
2206 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
2207 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
2208 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
2209 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
2210 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
2211 ; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4]
2212 ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2215 ; AVX512VPOPCNTDQ-LABEL: ult_4_v8i16:
2216 ; AVX512VPOPCNTDQ: # %bb.0:
2217 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2218 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
2219 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
2220 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastw {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4]
2221 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2222 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
2223 ; AVX512VPOPCNTDQ-NEXT: retq
2225 ; AVX512VPOPCNTDQVL-LABEL: ult_4_v8i16:
2226 ; AVX512VPOPCNTDQVL: # %bb.0:
2227 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2228 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
2229 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
2230 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4]
2231 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2232 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
2233 ; AVX512VPOPCNTDQVL-NEXT: retq
2235 ; BITALG_NOVLX-LABEL: ult_4_v8i16:
2236 ; BITALG_NOVLX: # %bb.0:
2237 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2238 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
2239 ; BITALG_NOVLX-NEXT: vpbroadcastw {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4]
2240 ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2241 ; BITALG_NOVLX-NEXT: vzeroupper
2242 ; BITALG_NOVLX-NEXT: retq
2244 ; BITALG-LABEL: ult_4_v8i16:
2246 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
2247 ; BITALG-NEXT: vpbroadcastw {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4]
2248 ; BITALG-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2250 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
2251 %3 = icmp ult <8 x i16> %2, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
2252 %4 = sext <8 x i1> %3 to <8 x i16>
2256 define <8 x i16> @ugt_4_v8i16(<8 x i16> %0) {
2257 ; SSE2-LABEL: ugt_4_v8i16:
2259 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2260 ; SSE2-NEXT: psrlw $1, %xmm1
2261 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2262 ; SSE2-NEXT: psubb %xmm1, %xmm0
2263 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2264 ; SSE2-NEXT: movdqa %xmm0, %xmm2
2265 ; SSE2-NEXT: pand %xmm1, %xmm2
2266 ; SSE2-NEXT: psrlw $2, %xmm0
2267 ; SSE2-NEXT: pand %xmm1, %xmm0
2268 ; SSE2-NEXT: paddb %xmm2, %xmm0
2269 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2270 ; SSE2-NEXT: psrlw $4, %xmm1
2271 ; SSE2-NEXT: paddb %xmm1, %xmm0
2272 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2273 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2274 ; SSE2-NEXT: psllw $8, %xmm1
2275 ; SSE2-NEXT: paddb %xmm1, %xmm0
2276 ; SSE2-NEXT: psrlw $8, %xmm0
2277 ; SSE2-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2280 ; SSE3-LABEL: ugt_4_v8i16:
2282 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2283 ; SSE3-NEXT: psrlw $1, %xmm1
2284 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2285 ; SSE3-NEXT: psubb %xmm1, %xmm0
2286 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2287 ; SSE3-NEXT: movdqa %xmm0, %xmm2
2288 ; SSE3-NEXT: pand %xmm1, %xmm2
2289 ; SSE3-NEXT: psrlw $2, %xmm0
2290 ; SSE3-NEXT: pand %xmm1, %xmm0
2291 ; SSE3-NEXT: paddb %xmm2, %xmm0
2292 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2293 ; SSE3-NEXT: psrlw $4, %xmm1
2294 ; SSE3-NEXT: paddb %xmm1, %xmm0
2295 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2296 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2297 ; SSE3-NEXT: psllw $8, %xmm1
2298 ; SSE3-NEXT: paddb %xmm1, %xmm0
2299 ; SSE3-NEXT: psrlw $8, %xmm0
2300 ; SSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2303 ; SSSE3-LABEL: ugt_4_v8i16:
2305 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2306 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
2307 ; SSSE3-NEXT: pand %xmm1, %xmm2
2308 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2309 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
2310 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
2311 ; SSSE3-NEXT: psrlw $4, %xmm0
2312 ; SSSE3-NEXT: pand %xmm1, %xmm0
2313 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
2314 ; SSSE3-NEXT: paddb %xmm4, %xmm3
2315 ; SSSE3-NEXT: movdqa %xmm3, %xmm0
2316 ; SSSE3-NEXT: psllw $8, %xmm0
2317 ; SSSE3-NEXT: paddb %xmm3, %xmm0
2318 ; SSSE3-NEXT: psrlw $8, %xmm0
2319 ; SSSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2322 ; SSE41-LABEL: ugt_4_v8i16:
2324 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2325 ; SSE41-NEXT: movdqa %xmm0, %xmm2
2326 ; SSE41-NEXT: pand %xmm1, %xmm2
2327 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2328 ; SSE41-NEXT: movdqa %xmm3, %xmm4
2329 ; SSE41-NEXT: pshufb %xmm2, %xmm4
2330 ; SSE41-NEXT: psrlw $4, %xmm0
2331 ; SSE41-NEXT: pand %xmm1, %xmm0
2332 ; SSE41-NEXT: pshufb %xmm0, %xmm3
2333 ; SSE41-NEXT: paddb %xmm4, %xmm3
2334 ; SSE41-NEXT: movdqa %xmm3, %xmm0
2335 ; SSE41-NEXT: psllw $8, %xmm0
2336 ; SSE41-NEXT: paddb %xmm3, %xmm0
2337 ; SSE41-NEXT: psrlw $8, %xmm0
2338 ; SSE41-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2341 ; AVX1-LABEL: ugt_4_v8i16:
2343 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2344 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
2345 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2346 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
2347 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
2348 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
2349 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
2350 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
2351 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
2352 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
2353 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
2354 ; AVX1-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2357 ; AVX2-LABEL: ugt_4_v8i16:
2359 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2360 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
2361 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2362 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
2363 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
2364 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
2365 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
2366 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
2367 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
2368 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
2369 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
2370 ; AVX2-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2373 ; AVX512VPOPCNTDQ-LABEL: ugt_4_v8i16:
2374 ; AVX512VPOPCNTDQ: # %bb.0:
2375 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2376 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
2377 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
2378 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2379 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
2380 ; AVX512VPOPCNTDQ-NEXT: retq
2382 ; AVX512VPOPCNTDQVL-LABEL: ugt_4_v8i16:
2383 ; AVX512VPOPCNTDQVL: # %bb.0:
2384 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2385 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
2386 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
2387 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2388 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
2389 ; AVX512VPOPCNTDQVL-NEXT: retq
2391 ; BITALG_NOVLX-LABEL: ugt_4_v8i16:
2392 ; BITALG_NOVLX: # %bb.0:
2393 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2394 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
2395 ; BITALG_NOVLX-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2396 ; BITALG_NOVLX-NEXT: vzeroupper
2397 ; BITALG_NOVLX-NEXT: retq
2399 ; BITALG-LABEL: ugt_4_v8i16:
2401 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
2402 ; BITALG-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2404 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
2405 %3 = icmp ugt <8 x i16> %2, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
2406 %4 = sext <8 x i1> %3 to <8 x i16>
2410 define <8 x i16> @ult_5_v8i16(<8 x i16> %0) {
2411 ; SSE2-LABEL: ult_5_v8i16:
2413 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2414 ; SSE2-NEXT: psrlw $1, %xmm1
2415 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2416 ; SSE2-NEXT: psubb %xmm1, %xmm0
2417 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2418 ; SSE2-NEXT: movdqa %xmm0, %xmm2
2419 ; SSE2-NEXT: pand %xmm1, %xmm2
2420 ; SSE2-NEXT: psrlw $2, %xmm0
2421 ; SSE2-NEXT: pand %xmm1, %xmm0
2422 ; SSE2-NEXT: paddb %xmm2, %xmm0
2423 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2424 ; SSE2-NEXT: psrlw $4, %xmm1
2425 ; SSE2-NEXT: paddb %xmm0, %xmm1
2426 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2427 ; SSE2-NEXT: movdqa %xmm1, %xmm2
2428 ; SSE2-NEXT: psllw $8, %xmm2
2429 ; SSE2-NEXT: paddb %xmm1, %xmm2
2430 ; SSE2-NEXT: psrlw $8, %xmm2
2431 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5]
2432 ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0
2435 ; SSE3-LABEL: ult_5_v8i16:
2437 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2438 ; SSE3-NEXT: psrlw $1, %xmm1
2439 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2440 ; SSE3-NEXT: psubb %xmm1, %xmm0
2441 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2442 ; SSE3-NEXT: movdqa %xmm0, %xmm2
2443 ; SSE3-NEXT: pand %xmm1, %xmm2
2444 ; SSE3-NEXT: psrlw $2, %xmm0
2445 ; SSE3-NEXT: pand %xmm1, %xmm0
2446 ; SSE3-NEXT: paddb %xmm2, %xmm0
2447 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2448 ; SSE3-NEXT: psrlw $4, %xmm1
2449 ; SSE3-NEXT: paddb %xmm0, %xmm1
2450 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2451 ; SSE3-NEXT: movdqa %xmm1, %xmm2
2452 ; SSE3-NEXT: psllw $8, %xmm2
2453 ; SSE3-NEXT: paddb %xmm1, %xmm2
2454 ; SSE3-NEXT: psrlw $8, %xmm2
2455 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5]
2456 ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0
2459 ; SSSE3-LABEL: ult_5_v8i16:
2461 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2462 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
2463 ; SSSE3-NEXT: pand %xmm1, %xmm2
2464 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2465 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
2466 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
2467 ; SSSE3-NEXT: psrlw $4, %xmm0
2468 ; SSSE3-NEXT: pand %xmm1, %xmm0
2469 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
2470 ; SSSE3-NEXT: paddb %xmm4, %xmm3
2471 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
2472 ; SSSE3-NEXT: psllw $8, %xmm1
2473 ; SSSE3-NEXT: paddb %xmm3, %xmm1
2474 ; SSSE3-NEXT: psrlw $8, %xmm1
2475 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5]
2476 ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
2479 ; SSE41-LABEL: ult_5_v8i16:
2481 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2482 ; SSE41-NEXT: movdqa %xmm0, %xmm2
2483 ; SSE41-NEXT: pand %xmm1, %xmm2
2484 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2485 ; SSE41-NEXT: movdqa %xmm3, %xmm4
2486 ; SSE41-NEXT: pshufb %xmm2, %xmm4
2487 ; SSE41-NEXT: psrlw $4, %xmm0
2488 ; SSE41-NEXT: pand %xmm1, %xmm0
2489 ; SSE41-NEXT: pshufb %xmm0, %xmm3
2490 ; SSE41-NEXT: paddb %xmm4, %xmm3
2491 ; SSE41-NEXT: movdqa %xmm3, %xmm1
2492 ; SSE41-NEXT: psllw $8, %xmm1
2493 ; SSE41-NEXT: paddb %xmm3, %xmm1
2494 ; SSE41-NEXT: psrlw $8, %xmm1
2495 ; SSE41-NEXT: pmovsxbw {{.*#+}} xmm0 = [5,5,5,5,5,5,5,5]
2496 ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0
2499 ; AVX1-LABEL: ult_5_v8i16:
2501 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2502 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
2503 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2504 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
2505 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
2506 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
2507 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
2508 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
2509 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
2510 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
2511 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
2512 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5]
2513 ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2516 ; AVX2-LABEL: ult_5_v8i16:
2518 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2519 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
2520 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2521 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
2522 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
2523 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
2524 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
2525 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
2526 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
2527 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
2528 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
2529 ; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5]
2530 ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2533 ; AVX512VPOPCNTDQ-LABEL: ult_5_v8i16:
2534 ; AVX512VPOPCNTDQ: # %bb.0:
2535 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2536 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
2537 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
2538 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastw {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5]
2539 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2540 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
2541 ; AVX512VPOPCNTDQ-NEXT: retq
2543 ; AVX512VPOPCNTDQVL-LABEL: ult_5_v8i16:
2544 ; AVX512VPOPCNTDQVL: # %bb.0:
2545 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2546 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
2547 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
2548 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5]
2549 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2550 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
2551 ; AVX512VPOPCNTDQVL-NEXT: retq
2553 ; BITALG_NOVLX-LABEL: ult_5_v8i16:
2554 ; BITALG_NOVLX: # %bb.0:
2555 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2556 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
2557 ; BITALG_NOVLX-NEXT: vpbroadcastw {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5]
2558 ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2559 ; BITALG_NOVLX-NEXT: vzeroupper
2560 ; BITALG_NOVLX-NEXT: retq
2562 ; BITALG-LABEL: ult_5_v8i16:
2564 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
2565 ; BITALG-NEXT: vpbroadcastw {{.*#+}} xmm1 = [5,5,5,5,5,5,5,5]
2566 ; BITALG-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2568 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
2569 %3 = icmp ult <8 x i16> %2, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
2570 %4 = sext <8 x i1> %3 to <8 x i16>
2574 define <8 x i16> @ugt_5_v8i16(<8 x i16> %0) {
2575 ; SSE2-LABEL: ugt_5_v8i16:
2577 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2578 ; SSE2-NEXT: psrlw $1, %xmm1
2579 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2580 ; SSE2-NEXT: psubb %xmm1, %xmm0
2581 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2582 ; SSE2-NEXT: movdqa %xmm0, %xmm2
2583 ; SSE2-NEXT: pand %xmm1, %xmm2
2584 ; SSE2-NEXT: psrlw $2, %xmm0
2585 ; SSE2-NEXT: pand %xmm1, %xmm0
2586 ; SSE2-NEXT: paddb %xmm2, %xmm0
2587 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2588 ; SSE2-NEXT: psrlw $4, %xmm1
2589 ; SSE2-NEXT: paddb %xmm1, %xmm0
2590 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2591 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2592 ; SSE2-NEXT: psllw $8, %xmm1
2593 ; SSE2-NEXT: paddb %xmm1, %xmm0
2594 ; SSE2-NEXT: psrlw $8, %xmm0
2595 ; SSE2-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2598 ; SSE3-LABEL: ugt_5_v8i16:
2600 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2601 ; SSE3-NEXT: psrlw $1, %xmm1
2602 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2603 ; SSE3-NEXT: psubb %xmm1, %xmm0
2604 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2605 ; SSE3-NEXT: movdqa %xmm0, %xmm2
2606 ; SSE3-NEXT: pand %xmm1, %xmm2
2607 ; SSE3-NEXT: psrlw $2, %xmm0
2608 ; SSE3-NEXT: pand %xmm1, %xmm0
2609 ; SSE3-NEXT: paddb %xmm2, %xmm0
2610 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2611 ; SSE3-NEXT: psrlw $4, %xmm1
2612 ; SSE3-NEXT: paddb %xmm1, %xmm0
2613 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2614 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2615 ; SSE3-NEXT: psllw $8, %xmm1
2616 ; SSE3-NEXT: paddb %xmm1, %xmm0
2617 ; SSE3-NEXT: psrlw $8, %xmm0
2618 ; SSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2621 ; SSSE3-LABEL: ugt_5_v8i16:
2623 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2624 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
2625 ; SSSE3-NEXT: pand %xmm1, %xmm2
2626 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2627 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
2628 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
2629 ; SSSE3-NEXT: psrlw $4, %xmm0
2630 ; SSSE3-NEXT: pand %xmm1, %xmm0
2631 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
2632 ; SSSE3-NEXT: paddb %xmm4, %xmm3
2633 ; SSSE3-NEXT: movdqa %xmm3, %xmm0
2634 ; SSSE3-NEXT: psllw $8, %xmm0
2635 ; SSSE3-NEXT: paddb %xmm3, %xmm0
2636 ; SSSE3-NEXT: psrlw $8, %xmm0
2637 ; SSSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2640 ; SSE41-LABEL: ugt_5_v8i16:
2642 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2643 ; SSE41-NEXT: movdqa %xmm0, %xmm2
2644 ; SSE41-NEXT: pand %xmm1, %xmm2
2645 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2646 ; SSE41-NEXT: movdqa %xmm3, %xmm4
2647 ; SSE41-NEXT: pshufb %xmm2, %xmm4
2648 ; SSE41-NEXT: psrlw $4, %xmm0
2649 ; SSE41-NEXT: pand %xmm1, %xmm0
2650 ; SSE41-NEXT: pshufb %xmm0, %xmm3
2651 ; SSE41-NEXT: paddb %xmm4, %xmm3
2652 ; SSE41-NEXT: movdqa %xmm3, %xmm0
2653 ; SSE41-NEXT: psllw $8, %xmm0
2654 ; SSE41-NEXT: paddb %xmm3, %xmm0
2655 ; SSE41-NEXT: psrlw $8, %xmm0
2656 ; SSE41-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2659 ; AVX1-LABEL: ugt_5_v8i16:
2661 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2662 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
2663 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2664 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
2665 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
2666 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
2667 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
2668 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
2669 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
2670 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
2671 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
2672 ; AVX1-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2675 ; AVX2-LABEL: ugt_5_v8i16:
2677 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2678 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
2679 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2680 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
2681 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
2682 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
2683 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
2684 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
2685 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
2686 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
2687 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
2688 ; AVX2-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2691 ; AVX512VPOPCNTDQ-LABEL: ugt_5_v8i16:
2692 ; AVX512VPOPCNTDQ: # %bb.0:
2693 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2694 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
2695 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
2696 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2697 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
2698 ; AVX512VPOPCNTDQ-NEXT: retq
2700 ; AVX512VPOPCNTDQVL-LABEL: ugt_5_v8i16:
2701 ; AVX512VPOPCNTDQVL: # %bb.0:
2702 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2703 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
2704 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
2705 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2706 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
2707 ; AVX512VPOPCNTDQVL-NEXT: retq
2709 ; BITALG_NOVLX-LABEL: ugt_5_v8i16:
2710 ; BITALG_NOVLX: # %bb.0:
2711 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2712 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
2713 ; BITALG_NOVLX-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2714 ; BITALG_NOVLX-NEXT: vzeroupper
2715 ; BITALG_NOVLX-NEXT: retq
2717 ; BITALG-LABEL: ugt_5_v8i16:
2719 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
2720 ; BITALG-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2722 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
2723 %3 = icmp ugt <8 x i16> %2, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
2724 %4 = sext <8 x i1> %3 to <8 x i16>
2728 define <8 x i16> @ult_6_v8i16(<8 x i16> %0) {
2729 ; SSE2-LABEL: ult_6_v8i16:
2731 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2732 ; SSE2-NEXT: psrlw $1, %xmm1
2733 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2734 ; SSE2-NEXT: psubb %xmm1, %xmm0
2735 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2736 ; SSE2-NEXT: movdqa %xmm0, %xmm2
2737 ; SSE2-NEXT: pand %xmm1, %xmm2
2738 ; SSE2-NEXT: psrlw $2, %xmm0
2739 ; SSE2-NEXT: pand %xmm1, %xmm0
2740 ; SSE2-NEXT: paddb %xmm2, %xmm0
2741 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2742 ; SSE2-NEXT: psrlw $4, %xmm1
2743 ; SSE2-NEXT: paddb %xmm0, %xmm1
2744 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2745 ; SSE2-NEXT: movdqa %xmm1, %xmm2
2746 ; SSE2-NEXT: psllw $8, %xmm2
2747 ; SSE2-NEXT: paddb %xmm1, %xmm2
2748 ; SSE2-NEXT: psrlw $8, %xmm2
2749 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6]
2750 ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0
2753 ; SSE3-LABEL: ult_6_v8i16:
2755 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2756 ; SSE3-NEXT: psrlw $1, %xmm1
2757 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2758 ; SSE3-NEXT: psubb %xmm1, %xmm0
2759 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2760 ; SSE3-NEXT: movdqa %xmm0, %xmm2
2761 ; SSE3-NEXT: pand %xmm1, %xmm2
2762 ; SSE3-NEXT: psrlw $2, %xmm0
2763 ; SSE3-NEXT: pand %xmm1, %xmm0
2764 ; SSE3-NEXT: paddb %xmm2, %xmm0
2765 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2766 ; SSE3-NEXT: psrlw $4, %xmm1
2767 ; SSE3-NEXT: paddb %xmm0, %xmm1
2768 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2769 ; SSE3-NEXT: movdqa %xmm1, %xmm2
2770 ; SSE3-NEXT: psllw $8, %xmm2
2771 ; SSE3-NEXT: paddb %xmm1, %xmm2
2772 ; SSE3-NEXT: psrlw $8, %xmm2
2773 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6]
2774 ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0
2777 ; SSSE3-LABEL: ult_6_v8i16:
2779 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2780 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
2781 ; SSSE3-NEXT: pand %xmm1, %xmm2
2782 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2783 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
2784 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
2785 ; SSSE3-NEXT: psrlw $4, %xmm0
2786 ; SSSE3-NEXT: pand %xmm1, %xmm0
2787 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
2788 ; SSSE3-NEXT: paddb %xmm4, %xmm3
2789 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
2790 ; SSSE3-NEXT: psllw $8, %xmm1
2791 ; SSSE3-NEXT: paddb %xmm3, %xmm1
2792 ; SSSE3-NEXT: psrlw $8, %xmm1
2793 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6]
2794 ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
2797 ; SSE41-LABEL: ult_6_v8i16:
2799 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2800 ; SSE41-NEXT: movdqa %xmm0, %xmm2
2801 ; SSE41-NEXT: pand %xmm1, %xmm2
2802 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2803 ; SSE41-NEXT: movdqa %xmm3, %xmm4
2804 ; SSE41-NEXT: pshufb %xmm2, %xmm4
2805 ; SSE41-NEXT: psrlw $4, %xmm0
2806 ; SSE41-NEXT: pand %xmm1, %xmm0
2807 ; SSE41-NEXT: pshufb %xmm0, %xmm3
2808 ; SSE41-NEXT: paddb %xmm4, %xmm3
2809 ; SSE41-NEXT: movdqa %xmm3, %xmm1
2810 ; SSE41-NEXT: psllw $8, %xmm1
2811 ; SSE41-NEXT: paddb %xmm3, %xmm1
2812 ; SSE41-NEXT: psrlw $8, %xmm1
2813 ; SSE41-NEXT: pmovsxbw {{.*#+}} xmm0 = [6,6,6,6,6,6,6,6]
2814 ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0
2817 ; AVX1-LABEL: ult_6_v8i16:
2819 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2820 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
2821 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2822 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
2823 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
2824 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
2825 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
2826 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
2827 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
2828 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
2829 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
2830 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6]
2831 ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2834 ; AVX2-LABEL: ult_6_v8i16:
2836 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2837 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
2838 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2839 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
2840 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
2841 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
2842 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
2843 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
2844 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
2845 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
2846 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
2847 ; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6]
2848 ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2851 ; AVX512VPOPCNTDQ-LABEL: ult_6_v8i16:
2852 ; AVX512VPOPCNTDQ: # %bb.0:
2853 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2854 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
2855 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
2856 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastw {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6]
2857 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2858 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
2859 ; AVX512VPOPCNTDQ-NEXT: retq
2861 ; AVX512VPOPCNTDQVL-LABEL: ult_6_v8i16:
2862 ; AVX512VPOPCNTDQVL: # %bb.0:
2863 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2864 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
2865 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
2866 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6]
2867 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2868 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
2869 ; AVX512VPOPCNTDQVL-NEXT: retq
2871 ; BITALG_NOVLX-LABEL: ult_6_v8i16:
2872 ; BITALG_NOVLX: # %bb.0:
2873 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
2874 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
2875 ; BITALG_NOVLX-NEXT: vpbroadcastw {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6]
2876 ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2877 ; BITALG_NOVLX-NEXT: vzeroupper
2878 ; BITALG_NOVLX-NEXT: retq
2880 ; BITALG-LABEL: ult_6_v8i16:
2882 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
2883 ; BITALG-NEXT: vpbroadcastw {{.*#+}} xmm1 = [6,6,6,6,6,6,6,6]
2884 ; BITALG-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
2886 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
2887 %3 = icmp ult <8 x i16> %2, <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>
2888 %4 = sext <8 x i1> %3 to <8 x i16>
2892 define <8 x i16> @ugt_6_v8i16(<8 x i16> %0) {
2893 ; SSE2-LABEL: ugt_6_v8i16:
2895 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2896 ; SSE2-NEXT: psrlw $1, %xmm1
2897 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2898 ; SSE2-NEXT: psubb %xmm1, %xmm0
2899 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2900 ; SSE2-NEXT: movdqa %xmm0, %xmm2
2901 ; SSE2-NEXT: pand %xmm1, %xmm2
2902 ; SSE2-NEXT: psrlw $2, %xmm0
2903 ; SSE2-NEXT: pand %xmm1, %xmm0
2904 ; SSE2-NEXT: paddb %xmm2, %xmm0
2905 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2906 ; SSE2-NEXT: psrlw $4, %xmm1
2907 ; SSE2-NEXT: paddb %xmm1, %xmm0
2908 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2909 ; SSE2-NEXT: movdqa %xmm0, %xmm1
2910 ; SSE2-NEXT: psllw $8, %xmm1
2911 ; SSE2-NEXT: paddb %xmm1, %xmm0
2912 ; SSE2-NEXT: psrlw $8, %xmm0
2913 ; SSE2-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2916 ; SSE3-LABEL: ugt_6_v8i16:
2918 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2919 ; SSE3-NEXT: psrlw $1, %xmm1
2920 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2921 ; SSE3-NEXT: psubb %xmm1, %xmm0
2922 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
2923 ; SSE3-NEXT: movdqa %xmm0, %xmm2
2924 ; SSE3-NEXT: pand %xmm1, %xmm2
2925 ; SSE3-NEXT: psrlw $2, %xmm0
2926 ; SSE3-NEXT: pand %xmm1, %xmm0
2927 ; SSE3-NEXT: paddb %xmm2, %xmm0
2928 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2929 ; SSE3-NEXT: psrlw $4, %xmm1
2930 ; SSE3-NEXT: paddb %xmm1, %xmm0
2931 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2932 ; SSE3-NEXT: movdqa %xmm0, %xmm1
2933 ; SSE3-NEXT: psllw $8, %xmm1
2934 ; SSE3-NEXT: paddb %xmm1, %xmm0
2935 ; SSE3-NEXT: psrlw $8, %xmm0
2936 ; SSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2939 ; SSSE3-LABEL: ugt_6_v8i16:
2941 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2942 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
2943 ; SSSE3-NEXT: pand %xmm1, %xmm2
2944 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2945 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
2946 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
2947 ; SSSE3-NEXT: psrlw $4, %xmm0
2948 ; SSSE3-NEXT: pand %xmm1, %xmm0
2949 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
2950 ; SSSE3-NEXT: paddb %xmm4, %xmm3
2951 ; SSSE3-NEXT: movdqa %xmm3, %xmm0
2952 ; SSSE3-NEXT: psllw $8, %xmm0
2953 ; SSSE3-NEXT: paddb %xmm3, %xmm0
2954 ; SSSE3-NEXT: psrlw $8, %xmm0
2955 ; SSSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2958 ; SSE41-LABEL: ugt_6_v8i16:
2960 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2961 ; SSE41-NEXT: movdqa %xmm0, %xmm2
2962 ; SSE41-NEXT: pand %xmm1, %xmm2
2963 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2964 ; SSE41-NEXT: movdqa %xmm3, %xmm4
2965 ; SSE41-NEXT: pshufb %xmm2, %xmm4
2966 ; SSE41-NEXT: psrlw $4, %xmm0
2967 ; SSE41-NEXT: pand %xmm1, %xmm0
2968 ; SSE41-NEXT: pshufb %xmm0, %xmm3
2969 ; SSE41-NEXT: paddb %xmm4, %xmm3
2970 ; SSE41-NEXT: movdqa %xmm3, %xmm0
2971 ; SSE41-NEXT: psllw $8, %xmm0
2972 ; SSE41-NEXT: paddb %xmm3, %xmm0
2973 ; SSE41-NEXT: psrlw $8, %xmm0
2974 ; SSE41-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2977 ; AVX1-LABEL: ugt_6_v8i16:
2979 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2980 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
2981 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2982 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
2983 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
2984 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
2985 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
2986 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
2987 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
2988 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
2989 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
2990 ; AVX1-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2993 ; AVX2-LABEL: ugt_6_v8i16:
2995 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
2996 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
2997 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
2998 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
2999 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
3000 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
3001 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
3002 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
3003 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
3004 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
3005 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
3006 ; AVX2-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3009 ; AVX512VPOPCNTDQ-LABEL: ugt_6_v8i16:
3010 ; AVX512VPOPCNTDQ: # %bb.0:
3011 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3012 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
3013 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
3014 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3015 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
3016 ; AVX512VPOPCNTDQ-NEXT: retq
3018 ; AVX512VPOPCNTDQVL-LABEL: ugt_6_v8i16:
3019 ; AVX512VPOPCNTDQVL: # %bb.0:
3020 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3021 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
3022 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
3023 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3024 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
3025 ; AVX512VPOPCNTDQVL-NEXT: retq
3027 ; BITALG_NOVLX-LABEL: ugt_6_v8i16:
3028 ; BITALG_NOVLX: # %bb.0:
3029 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3030 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
3031 ; BITALG_NOVLX-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3032 ; BITALG_NOVLX-NEXT: vzeroupper
3033 ; BITALG_NOVLX-NEXT: retq
3035 ; BITALG-LABEL: ugt_6_v8i16:
3037 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
3038 ; BITALG-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3040 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
3041 %3 = icmp ugt <8 x i16> %2, <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>
3042 %4 = sext <8 x i1> %3 to <8 x i16>
3046 define <8 x i16> @ult_7_v8i16(<8 x i16> %0) {
3047 ; SSE2-LABEL: ult_7_v8i16:
3049 ; SSE2-NEXT: movdqa %xmm0, %xmm1
3050 ; SSE2-NEXT: psrlw $1, %xmm1
3051 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3052 ; SSE2-NEXT: psubb %xmm1, %xmm0
3053 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3054 ; SSE2-NEXT: movdqa %xmm0, %xmm2
3055 ; SSE2-NEXT: pand %xmm1, %xmm2
3056 ; SSE2-NEXT: psrlw $2, %xmm0
3057 ; SSE2-NEXT: pand %xmm1, %xmm0
3058 ; SSE2-NEXT: paddb %xmm2, %xmm0
3059 ; SSE2-NEXT: movdqa %xmm0, %xmm1
3060 ; SSE2-NEXT: psrlw $4, %xmm1
3061 ; SSE2-NEXT: paddb %xmm0, %xmm1
3062 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3063 ; SSE2-NEXT: movdqa %xmm1, %xmm2
3064 ; SSE2-NEXT: psllw $8, %xmm2
3065 ; SSE2-NEXT: paddb %xmm1, %xmm2
3066 ; SSE2-NEXT: psrlw $8, %xmm2
3067 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7]
3068 ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0
3071 ; SSE3-LABEL: ult_7_v8i16:
3073 ; SSE3-NEXT: movdqa %xmm0, %xmm1
3074 ; SSE3-NEXT: psrlw $1, %xmm1
3075 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3076 ; SSE3-NEXT: psubb %xmm1, %xmm0
3077 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3078 ; SSE3-NEXT: movdqa %xmm0, %xmm2
3079 ; SSE3-NEXT: pand %xmm1, %xmm2
3080 ; SSE3-NEXT: psrlw $2, %xmm0
3081 ; SSE3-NEXT: pand %xmm1, %xmm0
3082 ; SSE3-NEXT: paddb %xmm2, %xmm0
3083 ; SSE3-NEXT: movdqa %xmm0, %xmm1
3084 ; SSE3-NEXT: psrlw $4, %xmm1
3085 ; SSE3-NEXT: paddb %xmm0, %xmm1
3086 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3087 ; SSE3-NEXT: movdqa %xmm1, %xmm2
3088 ; SSE3-NEXT: psllw $8, %xmm2
3089 ; SSE3-NEXT: paddb %xmm1, %xmm2
3090 ; SSE3-NEXT: psrlw $8, %xmm2
3091 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7]
3092 ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0
3095 ; SSSE3-LABEL: ult_7_v8i16:
3097 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3098 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
3099 ; SSSE3-NEXT: pand %xmm1, %xmm2
3100 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3101 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
3102 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
3103 ; SSSE3-NEXT: psrlw $4, %xmm0
3104 ; SSSE3-NEXT: pand %xmm1, %xmm0
3105 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
3106 ; SSSE3-NEXT: paddb %xmm4, %xmm3
3107 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
3108 ; SSSE3-NEXT: psllw $8, %xmm1
3109 ; SSSE3-NEXT: paddb %xmm3, %xmm1
3110 ; SSSE3-NEXT: psrlw $8, %xmm1
3111 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7]
3112 ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
3115 ; SSE41-LABEL: ult_7_v8i16:
3117 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3118 ; SSE41-NEXT: movdqa %xmm0, %xmm2
3119 ; SSE41-NEXT: pand %xmm1, %xmm2
3120 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3121 ; SSE41-NEXT: movdqa %xmm3, %xmm4
3122 ; SSE41-NEXT: pshufb %xmm2, %xmm4
3123 ; SSE41-NEXT: psrlw $4, %xmm0
3124 ; SSE41-NEXT: pand %xmm1, %xmm0
3125 ; SSE41-NEXT: pshufb %xmm0, %xmm3
3126 ; SSE41-NEXT: paddb %xmm4, %xmm3
3127 ; SSE41-NEXT: movdqa %xmm3, %xmm1
3128 ; SSE41-NEXT: psllw $8, %xmm1
3129 ; SSE41-NEXT: paddb %xmm3, %xmm1
3130 ; SSE41-NEXT: psrlw $8, %xmm1
3131 ; SSE41-NEXT: pmovsxbw {{.*#+}} xmm0 = [7,7,7,7,7,7,7,7]
3132 ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0
3135 ; AVX1-LABEL: ult_7_v8i16:
3137 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3138 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
3139 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3140 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
3141 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
3142 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
3143 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
3144 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
3145 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
3146 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
3147 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
3148 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7]
3149 ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3152 ; AVX2-LABEL: ult_7_v8i16:
3154 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3155 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
3156 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3157 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
3158 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
3159 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
3160 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
3161 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
3162 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
3163 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
3164 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
3165 ; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7]
3166 ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3169 ; AVX512VPOPCNTDQ-LABEL: ult_7_v8i16:
3170 ; AVX512VPOPCNTDQ: # %bb.0:
3171 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3172 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
3173 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
3174 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastw {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7]
3175 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3176 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
3177 ; AVX512VPOPCNTDQ-NEXT: retq
3179 ; AVX512VPOPCNTDQVL-LABEL: ult_7_v8i16:
3180 ; AVX512VPOPCNTDQVL: # %bb.0:
3181 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3182 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
3183 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
3184 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7]
3185 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3186 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
3187 ; AVX512VPOPCNTDQVL-NEXT: retq
3189 ; BITALG_NOVLX-LABEL: ult_7_v8i16:
3190 ; BITALG_NOVLX: # %bb.0:
3191 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3192 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
3193 ; BITALG_NOVLX-NEXT: vpbroadcastw {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7]
3194 ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3195 ; BITALG_NOVLX-NEXT: vzeroupper
3196 ; BITALG_NOVLX-NEXT: retq
3198 ; BITALG-LABEL: ult_7_v8i16:
3200 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
3201 ; BITALG-NEXT: vpbroadcastw {{.*#+}} xmm1 = [7,7,7,7,7,7,7,7]
3202 ; BITALG-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3204 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
3205 %3 = icmp ult <8 x i16> %2, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
3206 %4 = sext <8 x i1> %3 to <8 x i16>
3210 define <8 x i16> @ugt_7_v8i16(<8 x i16> %0) {
3211 ; SSE2-LABEL: ugt_7_v8i16:
3213 ; SSE2-NEXT: movdqa %xmm0, %xmm1
3214 ; SSE2-NEXT: psrlw $1, %xmm1
3215 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3216 ; SSE2-NEXT: psubb %xmm1, %xmm0
3217 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3218 ; SSE2-NEXT: movdqa %xmm0, %xmm2
3219 ; SSE2-NEXT: pand %xmm1, %xmm2
3220 ; SSE2-NEXT: psrlw $2, %xmm0
3221 ; SSE2-NEXT: pand %xmm1, %xmm0
3222 ; SSE2-NEXT: paddb %xmm2, %xmm0
3223 ; SSE2-NEXT: movdqa %xmm0, %xmm1
3224 ; SSE2-NEXT: psrlw $4, %xmm1
3225 ; SSE2-NEXT: paddb %xmm1, %xmm0
3226 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3227 ; SSE2-NEXT: movdqa %xmm0, %xmm1
3228 ; SSE2-NEXT: psllw $8, %xmm1
3229 ; SSE2-NEXT: paddb %xmm1, %xmm0
3230 ; SSE2-NEXT: psrlw $8, %xmm0
3231 ; SSE2-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3234 ; SSE3-LABEL: ugt_7_v8i16:
3236 ; SSE3-NEXT: movdqa %xmm0, %xmm1
3237 ; SSE3-NEXT: psrlw $1, %xmm1
3238 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3239 ; SSE3-NEXT: psubb %xmm1, %xmm0
3240 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3241 ; SSE3-NEXT: movdqa %xmm0, %xmm2
3242 ; SSE3-NEXT: pand %xmm1, %xmm2
3243 ; SSE3-NEXT: psrlw $2, %xmm0
3244 ; SSE3-NEXT: pand %xmm1, %xmm0
3245 ; SSE3-NEXT: paddb %xmm2, %xmm0
3246 ; SSE3-NEXT: movdqa %xmm0, %xmm1
3247 ; SSE3-NEXT: psrlw $4, %xmm1
3248 ; SSE3-NEXT: paddb %xmm1, %xmm0
3249 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3250 ; SSE3-NEXT: movdqa %xmm0, %xmm1
3251 ; SSE3-NEXT: psllw $8, %xmm1
3252 ; SSE3-NEXT: paddb %xmm1, %xmm0
3253 ; SSE3-NEXT: psrlw $8, %xmm0
3254 ; SSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3257 ; SSSE3-LABEL: ugt_7_v8i16:
3259 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3260 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
3261 ; SSSE3-NEXT: pand %xmm1, %xmm2
3262 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3263 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
3264 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
3265 ; SSSE3-NEXT: psrlw $4, %xmm0
3266 ; SSSE3-NEXT: pand %xmm1, %xmm0
3267 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
3268 ; SSSE3-NEXT: paddb %xmm4, %xmm3
3269 ; SSSE3-NEXT: movdqa %xmm3, %xmm0
3270 ; SSSE3-NEXT: psllw $8, %xmm0
3271 ; SSSE3-NEXT: paddb %xmm3, %xmm0
3272 ; SSSE3-NEXT: psrlw $8, %xmm0
3273 ; SSSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3276 ; SSE41-LABEL: ugt_7_v8i16:
3278 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3279 ; SSE41-NEXT: movdqa %xmm0, %xmm2
3280 ; SSE41-NEXT: pand %xmm1, %xmm2
3281 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3282 ; SSE41-NEXT: movdqa %xmm3, %xmm4
3283 ; SSE41-NEXT: pshufb %xmm2, %xmm4
3284 ; SSE41-NEXT: psrlw $4, %xmm0
3285 ; SSE41-NEXT: pand %xmm1, %xmm0
3286 ; SSE41-NEXT: pshufb %xmm0, %xmm3
3287 ; SSE41-NEXT: paddb %xmm4, %xmm3
3288 ; SSE41-NEXT: movdqa %xmm3, %xmm0
3289 ; SSE41-NEXT: psllw $8, %xmm0
3290 ; SSE41-NEXT: paddb %xmm3, %xmm0
3291 ; SSE41-NEXT: psrlw $8, %xmm0
3292 ; SSE41-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3295 ; AVX1-LABEL: ugt_7_v8i16:
3297 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3298 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
3299 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3300 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
3301 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
3302 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
3303 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
3304 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
3305 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
3306 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
3307 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
3308 ; AVX1-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3311 ; AVX2-LABEL: ugt_7_v8i16:
3313 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3314 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
3315 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3316 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
3317 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
3318 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
3319 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
3320 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
3321 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
3322 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
3323 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
3324 ; AVX2-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3327 ; AVX512VPOPCNTDQ-LABEL: ugt_7_v8i16:
3328 ; AVX512VPOPCNTDQ: # %bb.0:
3329 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3330 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
3331 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
3332 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3333 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
3334 ; AVX512VPOPCNTDQ-NEXT: retq
3336 ; AVX512VPOPCNTDQVL-LABEL: ugt_7_v8i16:
3337 ; AVX512VPOPCNTDQVL: # %bb.0:
3338 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3339 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
3340 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
3341 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3342 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
3343 ; AVX512VPOPCNTDQVL-NEXT: retq
3345 ; BITALG_NOVLX-LABEL: ugt_7_v8i16:
3346 ; BITALG_NOVLX: # %bb.0:
3347 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3348 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
3349 ; BITALG_NOVLX-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3350 ; BITALG_NOVLX-NEXT: vzeroupper
3351 ; BITALG_NOVLX-NEXT: retq
3353 ; BITALG-LABEL: ugt_7_v8i16:
3355 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
3356 ; BITALG-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3358 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
3359 %3 = icmp ugt <8 x i16> %2, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
3360 %4 = sext <8 x i1> %3 to <8 x i16>
3364 define <8 x i16> @ult_8_v8i16(<8 x i16> %0) {
3365 ; SSE2-LABEL: ult_8_v8i16:
3367 ; SSE2-NEXT: movdqa %xmm0, %xmm1
3368 ; SSE2-NEXT: psrlw $1, %xmm1
3369 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3370 ; SSE2-NEXT: psubb %xmm1, %xmm0
3371 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3372 ; SSE2-NEXT: movdqa %xmm0, %xmm2
3373 ; SSE2-NEXT: pand %xmm1, %xmm2
3374 ; SSE2-NEXT: psrlw $2, %xmm0
3375 ; SSE2-NEXT: pand %xmm1, %xmm0
3376 ; SSE2-NEXT: paddb %xmm2, %xmm0
3377 ; SSE2-NEXT: movdqa %xmm0, %xmm1
3378 ; SSE2-NEXT: psrlw $4, %xmm1
3379 ; SSE2-NEXT: paddb %xmm0, %xmm1
3380 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3381 ; SSE2-NEXT: movdqa %xmm1, %xmm2
3382 ; SSE2-NEXT: psllw $8, %xmm2
3383 ; SSE2-NEXT: paddb %xmm1, %xmm2
3384 ; SSE2-NEXT: psrlw $8, %xmm2
3385 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8,8,8,8,8]
3386 ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0
3389 ; SSE3-LABEL: ult_8_v8i16:
3391 ; SSE3-NEXT: movdqa %xmm0, %xmm1
3392 ; SSE3-NEXT: psrlw $1, %xmm1
3393 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3394 ; SSE3-NEXT: psubb %xmm1, %xmm0
3395 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3396 ; SSE3-NEXT: movdqa %xmm0, %xmm2
3397 ; SSE3-NEXT: pand %xmm1, %xmm2
3398 ; SSE3-NEXT: psrlw $2, %xmm0
3399 ; SSE3-NEXT: pand %xmm1, %xmm0
3400 ; SSE3-NEXT: paddb %xmm2, %xmm0
3401 ; SSE3-NEXT: movdqa %xmm0, %xmm1
3402 ; SSE3-NEXT: psrlw $4, %xmm1
3403 ; SSE3-NEXT: paddb %xmm0, %xmm1
3404 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3405 ; SSE3-NEXT: movdqa %xmm1, %xmm2
3406 ; SSE3-NEXT: psllw $8, %xmm2
3407 ; SSE3-NEXT: paddb %xmm1, %xmm2
3408 ; SSE3-NEXT: psrlw $8, %xmm2
3409 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8,8,8,8,8]
3410 ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0
3413 ; SSSE3-LABEL: ult_8_v8i16:
3415 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3416 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
3417 ; SSSE3-NEXT: pand %xmm1, %xmm2
3418 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3419 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
3420 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
3421 ; SSSE3-NEXT: psrlw $4, %xmm0
3422 ; SSSE3-NEXT: pand %xmm1, %xmm0
3423 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
3424 ; SSSE3-NEXT: paddb %xmm4, %xmm3
3425 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
3426 ; SSSE3-NEXT: psllw $8, %xmm1
3427 ; SSSE3-NEXT: paddb %xmm3, %xmm1
3428 ; SSSE3-NEXT: psrlw $8, %xmm1
3429 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8,8,8,8,8]
3430 ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
3433 ; SSE41-LABEL: ult_8_v8i16:
3435 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3436 ; SSE41-NEXT: movdqa %xmm0, %xmm2
3437 ; SSE41-NEXT: pand %xmm1, %xmm2
3438 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3439 ; SSE41-NEXT: movdqa %xmm3, %xmm4
3440 ; SSE41-NEXT: pshufb %xmm2, %xmm4
3441 ; SSE41-NEXT: psrlw $4, %xmm0
3442 ; SSE41-NEXT: pand %xmm1, %xmm0
3443 ; SSE41-NEXT: pshufb %xmm0, %xmm3
3444 ; SSE41-NEXT: paddb %xmm4, %xmm3
3445 ; SSE41-NEXT: movdqa %xmm3, %xmm1
3446 ; SSE41-NEXT: psllw $8, %xmm1
3447 ; SSE41-NEXT: paddb %xmm3, %xmm1
3448 ; SSE41-NEXT: psrlw $8, %xmm1
3449 ; SSE41-NEXT: pmovsxbw {{.*#+}} xmm0 = [8,8,8,8,8,8,8,8]
3450 ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0
3453 ; AVX1-LABEL: ult_8_v8i16:
3455 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3456 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
3457 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3458 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
3459 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
3460 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
3461 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
3462 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
3463 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
3464 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
3465 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
3466 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8]
3467 ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3470 ; AVX2-LABEL: ult_8_v8i16:
3472 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3473 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
3474 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3475 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
3476 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
3477 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
3478 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
3479 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
3480 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
3481 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
3482 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
3483 ; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8]
3484 ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3487 ; AVX512VPOPCNTDQ-LABEL: ult_8_v8i16:
3488 ; AVX512VPOPCNTDQ: # %bb.0:
3489 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3490 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
3491 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
3492 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastw {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8]
3493 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3494 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
3495 ; AVX512VPOPCNTDQ-NEXT: retq
3497 ; AVX512VPOPCNTDQVL-LABEL: ult_8_v8i16:
3498 ; AVX512VPOPCNTDQVL: # %bb.0:
3499 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3500 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
3501 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
3502 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8]
3503 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3504 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
3505 ; AVX512VPOPCNTDQVL-NEXT: retq
3507 ; BITALG_NOVLX-LABEL: ult_8_v8i16:
3508 ; BITALG_NOVLX: # %bb.0:
3509 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3510 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
3511 ; BITALG_NOVLX-NEXT: vpbroadcastw {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8]
3512 ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3513 ; BITALG_NOVLX-NEXT: vzeroupper
3514 ; BITALG_NOVLX-NEXT: retq
3516 ; BITALG-LABEL: ult_8_v8i16:
3518 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
3519 ; BITALG-NEXT: vpbroadcastw {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8]
3520 ; BITALG-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3522 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
3523 %3 = icmp ult <8 x i16> %2, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
3524 %4 = sext <8 x i1> %3 to <8 x i16>
3528 define <8 x i16> @ugt_8_v8i16(<8 x i16> %0) {
3529 ; SSE2-LABEL: ugt_8_v8i16:
3531 ; SSE2-NEXT: movdqa %xmm0, %xmm1
3532 ; SSE2-NEXT: psrlw $1, %xmm1
3533 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3534 ; SSE2-NEXT: psubb %xmm1, %xmm0
3535 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3536 ; SSE2-NEXT: movdqa %xmm0, %xmm2
3537 ; SSE2-NEXT: pand %xmm1, %xmm2
3538 ; SSE2-NEXT: psrlw $2, %xmm0
3539 ; SSE2-NEXT: pand %xmm1, %xmm0
3540 ; SSE2-NEXT: paddb %xmm2, %xmm0
3541 ; SSE2-NEXT: movdqa %xmm0, %xmm1
3542 ; SSE2-NEXT: psrlw $4, %xmm1
3543 ; SSE2-NEXT: paddb %xmm1, %xmm0
3544 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3545 ; SSE2-NEXT: movdqa %xmm0, %xmm1
3546 ; SSE2-NEXT: psllw $8, %xmm1
3547 ; SSE2-NEXT: paddb %xmm1, %xmm0
3548 ; SSE2-NEXT: psrlw $8, %xmm0
3549 ; SSE2-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3552 ; SSE3-LABEL: ugt_8_v8i16:
3554 ; SSE3-NEXT: movdqa %xmm0, %xmm1
3555 ; SSE3-NEXT: psrlw $1, %xmm1
3556 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3557 ; SSE3-NEXT: psubb %xmm1, %xmm0
3558 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3559 ; SSE3-NEXT: movdqa %xmm0, %xmm2
3560 ; SSE3-NEXT: pand %xmm1, %xmm2
3561 ; SSE3-NEXT: psrlw $2, %xmm0
3562 ; SSE3-NEXT: pand %xmm1, %xmm0
3563 ; SSE3-NEXT: paddb %xmm2, %xmm0
3564 ; SSE3-NEXT: movdqa %xmm0, %xmm1
3565 ; SSE3-NEXT: psrlw $4, %xmm1
3566 ; SSE3-NEXT: paddb %xmm1, %xmm0
3567 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3568 ; SSE3-NEXT: movdqa %xmm0, %xmm1
3569 ; SSE3-NEXT: psllw $8, %xmm1
3570 ; SSE3-NEXT: paddb %xmm1, %xmm0
3571 ; SSE3-NEXT: psrlw $8, %xmm0
3572 ; SSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3575 ; SSSE3-LABEL: ugt_8_v8i16:
3577 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3578 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
3579 ; SSSE3-NEXT: pand %xmm1, %xmm2
3580 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3581 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
3582 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
3583 ; SSSE3-NEXT: psrlw $4, %xmm0
3584 ; SSSE3-NEXT: pand %xmm1, %xmm0
3585 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
3586 ; SSSE3-NEXT: paddb %xmm4, %xmm3
3587 ; SSSE3-NEXT: movdqa %xmm3, %xmm0
3588 ; SSSE3-NEXT: psllw $8, %xmm0
3589 ; SSSE3-NEXT: paddb %xmm3, %xmm0
3590 ; SSSE3-NEXT: psrlw $8, %xmm0
3591 ; SSSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3594 ; SSE41-LABEL: ugt_8_v8i16:
3596 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3597 ; SSE41-NEXT: movdqa %xmm0, %xmm2
3598 ; SSE41-NEXT: pand %xmm1, %xmm2
3599 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3600 ; SSE41-NEXT: movdqa %xmm3, %xmm4
3601 ; SSE41-NEXT: pshufb %xmm2, %xmm4
3602 ; SSE41-NEXT: psrlw $4, %xmm0
3603 ; SSE41-NEXT: pand %xmm1, %xmm0
3604 ; SSE41-NEXT: pshufb %xmm0, %xmm3
3605 ; SSE41-NEXT: paddb %xmm4, %xmm3
3606 ; SSE41-NEXT: movdqa %xmm3, %xmm0
3607 ; SSE41-NEXT: psllw $8, %xmm0
3608 ; SSE41-NEXT: paddb %xmm3, %xmm0
3609 ; SSE41-NEXT: psrlw $8, %xmm0
3610 ; SSE41-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3613 ; AVX1-LABEL: ugt_8_v8i16:
3615 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3616 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
3617 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3618 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
3619 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
3620 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
3621 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
3622 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
3623 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
3624 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
3625 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
3626 ; AVX1-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3629 ; AVX2-LABEL: ugt_8_v8i16:
3631 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3632 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
3633 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3634 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
3635 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
3636 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
3637 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
3638 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
3639 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
3640 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
3641 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
3642 ; AVX2-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3645 ; AVX512VPOPCNTDQ-LABEL: ugt_8_v8i16:
3646 ; AVX512VPOPCNTDQ: # %bb.0:
3647 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3648 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
3649 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
3650 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3651 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
3652 ; AVX512VPOPCNTDQ-NEXT: retq
3654 ; AVX512VPOPCNTDQVL-LABEL: ugt_8_v8i16:
3655 ; AVX512VPOPCNTDQVL: # %bb.0:
3656 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3657 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
3658 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
3659 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3660 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
3661 ; AVX512VPOPCNTDQVL-NEXT: retq
3663 ; BITALG_NOVLX-LABEL: ugt_8_v8i16:
3664 ; BITALG_NOVLX: # %bb.0:
3665 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3666 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
3667 ; BITALG_NOVLX-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3668 ; BITALG_NOVLX-NEXT: vzeroupper
3669 ; BITALG_NOVLX-NEXT: retq
3671 ; BITALG-LABEL: ugt_8_v8i16:
3673 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
3674 ; BITALG-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3676 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
3677 %3 = icmp ugt <8 x i16> %2, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
3678 %4 = sext <8 x i1> %3 to <8 x i16>
3682 define <8 x i16> @ult_9_v8i16(<8 x i16> %0) {
3683 ; SSE2-LABEL: ult_9_v8i16:
3685 ; SSE2-NEXT: movdqa %xmm0, %xmm1
3686 ; SSE2-NEXT: psrlw $1, %xmm1
3687 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3688 ; SSE2-NEXT: psubb %xmm1, %xmm0
3689 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3690 ; SSE2-NEXT: movdqa %xmm0, %xmm2
3691 ; SSE2-NEXT: pand %xmm1, %xmm2
3692 ; SSE2-NEXT: psrlw $2, %xmm0
3693 ; SSE2-NEXT: pand %xmm1, %xmm0
3694 ; SSE2-NEXT: paddb %xmm2, %xmm0
3695 ; SSE2-NEXT: movdqa %xmm0, %xmm1
3696 ; SSE2-NEXT: psrlw $4, %xmm1
3697 ; SSE2-NEXT: paddb %xmm0, %xmm1
3698 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3699 ; SSE2-NEXT: movdqa %xmm1, %xmm2
3700 ; SSE2-NEXT: psllw $8, %xmm2
3701 ; SSE2-NEXT: paddb %xmm1, %xmm2
3702 ; SSE2-NEXT: psrlw $8, %xmm2
3703 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9,9,9,9,9]
3704 ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0
3707 ; SSE3-LABEL: ult_9_v8i16:
3709 ; SSE3-NEXT: movdqa %xmm0, %xmm1
3710 ; SSE3-NEXT: psrlw $1, %xmm1
3711 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3712 ; SSE3-NEXT: psubb %xmm1, %xmm0
3713 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3714 ; SSE3-NEXT: movdqa %xmm0, %xmm2
3715 ; SSE3-NEXT: pand %xmm1, %xmm2
3716 ; SSE3-NEXT: psrlw $2, %xmm0
3717 ; SSE3-NEXT: pand %xmm1, %xmm0
3718 ; SSE3-NEXT: paddb %xmm2, %xmm0
3719 ; SSE3-NEXT: movdqa %xmm0, %xmm1
3720 ; SSE3-NEXT: psrlw $4, %xmm1
3721 ; SSE3-NEXT: paddb %xmm0, %xmm1
3722 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3723 ; SSE3-NEXT: movdqa %xmm1, %xmm2
3724 ; SSE3-NEXT: psllw $8, %xmm2
3725 ; SSE3-NEXT: paddb %xmm1, %xmm2
3726 ; SSE3-NEXT: psrlw $8, %xmm2
3727 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9,9,9,9,9]
3728 ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0
3731 ; SSSE3-LABEL: ult_9_v8i16:
3733 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3734 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
3735 ; SSSE3-NEXT: pand %xmm1, %xmm2
3736 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3737 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
3738 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
3739 ; SSSE3-NEXT: psrlw $4, %xmm0
3740 ; SSSE3-NEXT: pand %xmm1, %xmm0
3741 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
3742 ; SSSE3-NEXT: paddb %xmm4, %xmm3
3743 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
3744 ; SSSE3-NEXT: psllw $8, %xmm1
3745 ; SSSE3-NEXT: paddb %xmm3, %xmm1
3746 ; SSSE3-NEXT: psrlw $8, %xmm1
3747 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9,9,9,9,9]
3748 ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
3751 ; SSE41-LABEL: ult_9_v8i16:
3753 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3754 ; SSE41-NEXT: movdqa %xmm0, %xmm2
3755 ; SSE41-NEXT: pand %xmm1, %xmm2
3756 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3757 ; SSE41-NEXT: movdqa %xmm3, %xmm4
3758 ; SSE41-NEXT: pshufb %xmm2, %xmm4
3759 ; SSE41-NEXT: psrlw $4, %xmm0
3760 ; SSE41-NEXT: pand %xmm1, %xmm0
3761 ; SSE41-NEXT: pshufb %xmm0, %xmm3
3762 ; SSE41-NEXT: paddb %xmm4, %xmm3
3763 ; SSE41-NEXT: movdqa %xmm3, %xmm1
3764 ; SSE41-NEXT: psllw $8, %xmm1
3765 ; SSE41-NEXT: paddb %xmm3, %xmm1
3766 ; SSE41-NEXT: psrlw $8, %xmm1
3767 ; SSE41-NEXT: pmovsxbw {{.*#+}} xmm0 = [9,9,9,9,9,9,9,9]
3768 ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0
3771 ; AVX1-LABEL: ult_9_v8i16:
3773 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3774 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
3775 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3776 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
3777 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
3778 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
3779 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
3780 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
3781 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
3782 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
3783 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
3784 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9]
3785 ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3788 ; AVX2-LABEL: ult_9_v8i16:
3790 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3791 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
3792 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3793 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
3794 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
3795 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
3796 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
3797 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
3798 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
3799 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
3800 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
3801 ; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9]
3802 ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3805 ; AVX512VPOPCNTDQ-LABEL: ult_9_v8i16:
3806 ; AVX512VPOPCNTDQ: # %bb.0:
3807 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3808 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
3809 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
3810 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastw {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9]
3811 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3812 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
3813 ; AVX512VPOPCNTDQ-NEXT: retq
3815 ; AVX512VPOPCNTDQVL-LABEL: ult_9_v8i16:
3816 ; AVX512VPOPCNTDQVL: # %bb.0:
3817 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3818 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
3819 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
3820 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9]
3821 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3822 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
3823 ; AVX512VPOPCNTDQVL-NEXT: retq
3825 ; BITALG_NOVLX-LABEL: ult_9_v8i16:
3826 ; BITALG_NOVLX: # %bb.0:
3827 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3828 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
3829 ; BITALG_NOVLX-NEXT: vpbroadcastw {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9]
3830 ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3831 ; BITALG_NOVLX-NEXT: vzeroupper
3832 ; BITALG_NOVLX-NEXT: retq
3834 ; BITALG-LABEL: ult_9_v8i16:
3836 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
3837 ; BITALG-NEXT: vpbroadcastw {{.*#+}} xmm1 = [9,9,9,9,9,9,9,9]
3838 ; BITALG-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
3840 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
3841 %3 = icmp ult <8 x i16> %2, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
3842 %4 = sext <8 x i1> %3 to <8 x i16>
3846 define <8 x i16> @ugt_9_v8i16(<8 x i16> %0) {
3847 ; SSE2-LABEL: ugt_9_v8i16:
3849 ; SSE2-NEXT: movdqa %xmm0, %xmm1
3850 ; SSE2-NEXT: psrlw $1, %xmm1
3851 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3852 ; SSE2-NEXT: psubb %xmm1, %xmm0
3853 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3854 ; SSE2-NEXT: movdqa %xmm0, %xmm2
3855 ; SSE2-NEXT: pand %xmm1, %xmm2
3856 ; SSE2-NEXT: psrlw $2, %xmm0
3857 ; SSE2-NEXT: pand %xmm1, %xmm0
3858 ; SSE2-NEXT: paddb %xmm2, %xmm0
3859 ; SSE2-NEXT: movdqa %xmm0, %xmm1
3860 ; SSE2-NEXT: psrlw $4, %xmm1
3861 ; SSE2-NEXT: paddb %xmm1, %xmm0
3862 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3863 ; SSE2-NEXT: movdqa %xmm0, %xmm1
3864 ; SSE2-NEXT: psllw $8, %xmm1
3865 ; SSE2-NEXT: paddb %xmm1, %xmm0
3866 ; SSE2-NEXT: psrlw $8, %xmm0
3867 ; SSE2-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3870 ; SSE3-LABEL: ugt_9_v8i16:
3872 ; SSE3-NEXT: movdqa %xmm0, %xmm1
3873 ; SSE3-NEXT: psrlw $1, %xmm1
3874 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
3875 ; SSE3-NEXT: psubb %xmm1, %xmm0
3876 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
3877 ; SSE3-NEXT: movdqa %xmm0, %xmm2
3878 ; SSE3-NEXT: pand %xmm1, %xmm2
3879 ; SSE3-NEXT: psrlw $2, %xmm0
3880 ; SSE3-NEXT: pand %xmm1, %xmm0
3881 ; SSE3-NEXT: paddb %xmm2, %xmm0
3882 ; SSE3-NEXT: movdqa %xmm0, %xmm1
3883 ; SSE3-NEXT: psrlw $4, %xmm1
3884 ; SSE3-NEXT: paddb %xmm1, %xmm0
3885 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3886 ; SSE3-NEXT: movdqa %xmm0, %xmm1
3887 ; SSE3-NEXT: psllw $8, %xmm1
3888 ; SSE3-NEXT: paddb %xmm1, %xmm0
3889 ; SSE3-NEXT: psrlw $8, %xmm0
3890 ; SSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3893 ; SSSE3-LABEL: ugt_9_v8i16:
3895 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3896 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
3897 ; SSSE3-NEXT: pand %xmm1, %xmm2
3898 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3899 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
3900 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
3901 ; SSSE3-NEXT: psrlw $4, %xmm0
3902 ; SSSE3-NEXT: pand %xmm1, %xmm0
3903 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
3904 ; SSSE3-NEXT: paddb %xmm4, %xmm3
3905 ; SSSE3-NEXT: movdqa %xmm3, %xmm0
3906 ; SSSE3-NEXT: psllw $8, %xmm0
3907 ; SSSE3-NEXT: paddb %xmm3, %xmm0
3908 ; SSSE3-NEXT: psrlw $8, %xmm0
3909 ; SSSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3912 ; SSE41-LABEL: ugt_9_v8i16:
3914 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3915 ; SSE41-NEXT: movdqa %xmm0, %xmm2
3916 ; SSE41-NEXT: pand %xmm1, %xmm2
3917 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3918 ; SSE41-NEXT: movdqa %xmm3, %xmm4
3919 ; SSE41-NEXT: pshufb %xmm2, %xmm4
3920 ; SSE41-NEXT: psrlw $4, %xmm0
3921 ; SSE41-NEXT: pand %xmm1, %xmm0
3922 ; SSE41-NEXT: pshufb %xmm0, %xmm3
3923 ; SSE41-NEXT: paddb %xmm4, %xmm3
3924 ; SSE41-NEXT: movdqa %xmm3, %xmm0
3925 ; SSE41-NEXT: psllw $8, %xmm0
3926 ; SSE41-NEXT: paddb %xmm3, %xmm0
3927 ; SSE41-NEXT: psrlw $8, %xmm0
3928 ; SSE41-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3931 ; AVX1-LABEL: ugt_9_v8i16:
3933 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3934 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
3935 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3936 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
3937 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
3938 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
3939 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
3940 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
3941 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
3942 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
3943 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
3944 ; AVX1-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3947 ; AVX2-LABEL: ugt_9_v8i16:
3949 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
3950 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
3951 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
3952 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
3953 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
3954 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
3955 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
3956 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
3957 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
3958 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
3959 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
3960 ; AVX2-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3963 ; AVX512VPOPCNTDQ-LABEL: ugt_9_v8i16:
3964 ; AVX512VPOPCNTDQ: # %bb.0:
3965 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3966 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
3967 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
3968 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3969 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
3970 ; AVX512VPOPCNTDQ-NEXT: retq
3972 ; AVX512VPOPCNTDQVL-LABEL: ugt_9_v8i16:
3973 ; AVX512VPOPCNTDQVL: # %bb.0:
3974 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
3975 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
3976 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
3977 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3978 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
3979 ; AVX512VPOPCNTDQVL-NEXT: retq
3981 ; BITALG_NOVLX-LABEL: ugt_9_v8i16:
3982 ; BITALG_NOVLX: # %bb.0:
3983 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
3984 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
3985 ; BITALG_NOVLX-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3986 ; BITALG_NOVLX-NEXT: vzeroupper
3987 ; BITALG_NOVLX-NEXT: retq
3989 ; BITALG-LABEL: ugt_9_v8i16:
3991 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
3992 ; BITALG-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
3994 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
3995 %3 = icmp ugt <8 x i16> %2, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
3996 %4 = sext <8 x i1> %3 to <8 x i16>
4000 define <8 x i16> @ult_10_v8i16(<8 x i16> %0) {
4001 ; SSE2-LABEL: ult_10_v8i16:
4003 ; SSE2-NEXT: movdqa %xmm0, %xmm1
4004 ; SSE2-NEXT: psrlw $1, %xmm1
4005 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4006 ; SSE2-NEXT: psubb %xmm1, %xmm0
4007 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4008 ; SSE2-NEXT: movdqa %xmm0, %xmm2
4009 ; SSE2-NEXT: pand %xmm1, %xmm2
4010 ; SSE2-NEXT: psrlw $2, %xmm0
4011 ; SSE2-NEXT: pand %xmm1, %xmm0
4012 ; SSE2-NEXT: paddb %xmm2, %xmm0
4013 ; SSE2-NEXT: movdqa %xmm0, %xmm1
4014 ; SSE2-NEXT: psrlw $4, %xmm1
4015 ; SSE2-NEXT: paddb %xmm0, %xmm1
4016 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4017 ; SSE2-NEXT: movdqa %xmm1, %xmm2
4018 ; SSE2-NEXT: psllw $8, %xmm2
4019 ; SSE2-NEXT: paddb %xmm1, %xmm2
4020 ; SSE2-NEXT: psrlw $8, %xmm2
4021 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10,10,10,10,10]
4022 ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0
4025 ; SSE3-LABEL: ult_10_v8i16:
4027 ; SSE3-NEXT: movdqa %xmm0, %xmm1
4028 ; SSE3-NEXT: psrlw $1, %xmm1
4029 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4030 ; SSE3-NEXT: psubb %xmm1, %xmm0
4031 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4032 ; SSE3-NEXT: movdqa %xmm0, %xmm2
4033 ; SSE3-NEXT: pand %xmm1, %xmm2
4034 ; SSE3-NEXT: psrlw $2, %xmm0
4035 ; SSE3-NEXT: pand %xmm1, %xmm0
4036 ; SSE3-NEXT: paddb %xmm2, %xmm0
4037 ; SSE3-NEXT: movdqa %xmm0, %xmm1
4038 ; SSE3-NEXT: psrlw $4, %xmm1
4039 ; SSE3-NEXT: paddb %xmm0, %xmm1
4040 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4041 ; SSE3-NEXT: movdqa %xmm1, %xmm2
4042 ; SSE3-NEXT: psllw $8, %xmm2
4043 ; SSE3-NEXT: paddb %xmm1, %xmm2
4044 ; SSE3-NEXT: psrlw $8, %xmm2
4045 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10,10,10,10,10]
4046 ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0
4049 ; SSSE3-LABEL: ult_10_v8i16:
4051 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4052 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
4053 ; SSSE3-NEXT: pand %xmm1, %xmm2
4054 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4055 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
4056 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
4057 ; SSSE3-NEXT: psrlw $4, %xmm0
4058 ; SSSE3-NEXT: pand %xmm1, %xmm0
4059 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
4060 ; SSSE3-NEXT: paddb %xmm4, %xmm3
4061 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
4062 ; SSSE3-NEXT: psllw $8, %xmm1
4063 ; SSSE3-NEXT: paddb %xmm3, %xmm1
4064 ; SSSE3-NEXT: psrlw $8, %xmm1
4065 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10,10,10,10,10]
4066 ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
4069 ; SSE41-LABEL: ult_10_v8i16:
4071 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4072 ; SSE41-NEXT: movdqa %xmm0, %xmm2
4073 ; SSE41-NEXT: pand %xmm1, %xmm2
4074 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4075 ; SSE41-NEXT: movdqa %xmm3, %xmm4
4076 ; SSE41-NEXT: pshufb %xmm2, %xmm4
4077 ; SSE41-NEXT: psrlw $4, %xmm0
4078 ; SSE41-NEXT: pand %xmm1, %xmm0
4079 ; SSE41-NEXT: pshufb %xmm0, %xmm3
4080 ; SSE41-NEXT: paddb %xmm4, %xmm3
4081 ; SSE41-NEXT: movdqa %xmm3, %xmm1
4082 ; SSE41-NEXT: psllw $8, %xmm1
4083 ; SSE41-NEXT: paddb %xmm3, %xmm1
4084 ; SSE41-NEXT: psrlw $8, %xmm1
4085 ; SSE41-NEXT: pmovsxbw {{.*#+}} xmm0 = [10,10,10,10,10,10,10,10]
4086 ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0
4089 ; AVX1-LABEL: ult_10_v8i16:
4091 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4092 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
4093 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4094 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
4095 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
4096 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
4097 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
4098 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4099 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
4100 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
4101 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
4102 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10]
4103 ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4106 ; AVX2-LABEL: ult_10_v8i16:
4108 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4109 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
4110 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4111 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
4112 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
4113 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
4114 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
4115 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4116 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
4117 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
4118 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
4119 ; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10]
4120 ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4123 ; AVX512VPOPCNTDQ-LABEL: ult_10_v8i16:
4124 ; AVX512VPOPCNTDQ: # %bb.0:
4125 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4126 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
4127 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
4128 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastw {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10]
4129 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4130 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
4131 ; AVX512VPOPCNTDQ-NEXT: retq
4133 ; AVX512VPOPCNTDQVL-LABEL: ult_10_v8i16:
4134 ; AVX512VPOPCNTDQVL: # %bb.0:
4135 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4136 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
4137 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
4138 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10]
4139 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4140 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
4141 ; AVX512VPOPCNTDQVL-NEXT: retq
4143 ; BITALG_NOVLX-LABEL: ult_10_v8i16:
4144 ; BITALG_NOVLX: # %bb.0:
4145 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4146 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
4147 ; BITALG_NOVLX-NEXT: vpbroadcastw {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10]
4148 ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4149 ; BITALG_NOVLX-NEXT: vzeroupper
4150 ; BITALG_NOVLX-NEXT: retq
4152 ; BITALG-LABEL: ult_10_v8i16:
4154 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
4155 ; BITALG-NEXT: vpbroadcastw {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10]
4156 ; BITALG-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4158 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
4159 %3 = icmp ult <8 x i16> %2, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
4160 %4 = sext <8 x i1> %3 to <8 x i16>
4164 define <8 x i16> @ugt_10_v8i16(<8 x i16> %0) {
4165 ; SSE2-LABEL: ugt_10_v8i16:
4167 ; SSE2-NEXT: movdqa %xmm0, %xmm1
4168 ; SSE2-NEXT: psrlw $1, %xmm1
4169 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4170 ; SSE2-NEXT: psubb %xmm1, %xmm0
4171 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4172 ; SSE2-NEXT: movdqa %xmm0, %xmm2
4173 ; SSE2-NEXT: pand %xmm1, %xmm2
4174 ; SSE2-NEXT: psrlw $2, %xmm0
4175 ; SSE2-NEXT: pand %xmm1, %xmm0
4176 ; SSE2-NEXT: paddb %xmm2, %xmm0
4177 ; SSE2-NEXT: movdqa %xmm0, %xmm1
4178 ; SSE2-NEXT: psrlw $4, %xmm1
4179 ; SSE2-NEXT: paddb %xmm1, %xmm0
4180 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4181 ; SSE2-NEXT: movdqa %xmm0, %xmm1
4182 ; SSE2-NEXT: psllw $8, %xmm1
4183 ; SSE2-NEXT: paddb %xmm1, %xmm0
4184 ; SSE2-NEXT: psrlw $8, %xmm0
4185 ; SSE2-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4188 ; SSE3-LABEL: ugt_10_v8i16:
4190 ; SSE3-NEXT: movdqa %xmm0, %xmm1
4191 ; SSE3-NEXT: psrlw $1, %xmm1
4192 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4193 ; SSE3-NEXT: psubb %xmm1, %xmm0
4194 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4195 ; SSE3-NEXT: movdqa %xmm0, %xmm2
4196 ; SSE3-NEXT: pand %xmm1, %xmm2
4197 ; SSE3-NEXT: psrlw $2, %xmm0
4198 ; SSE3-NEXT: pand %xmm1, %xmm0
4199 ; SSE3-NEXT: paddb %xmm2, %xmm0
4200 ; SSE3-NEXT: movdqa %xmm0, %xmm1
4201 ; SSE3-NEXT: psrlw $4, %xmm1
4202 ; SSE3-NEXT: paddb %xmm1, %xmm0
4203 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4204 ; SSE3-NEXT: movdqa %xmm0, %xmm1
4205 ; SSE3-NEXT: psllw $8, %xmm1
4206 ; SSE3-NEXT: paddb %xmm1, %xmm0
4207 ; SSE3-NEXT: psrlw $8, %xmm0
4208 ; SSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4211 ; SSSE3-LABEL: ugt_10_v8i16:
4213 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4214 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
4215 ; SSSE3-NEXT: pand %xmm1, %xmm2
4216 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4217 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
4218 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
4219 ; SSSE3-NEXT: psrlw $4, %xmm0
4220 ; SSSE3-NEXT: pand %xmm1, %xmm0
4221 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
4222 ; SSSE3-NEXT: paddb %xmm4, %xmm3
4223 ; SSSE3-NEXT: movdqa %xmm3, %xmm0
4224 ; SSSE3-NEXT: psllw $8, %xmm0
4225 ; SSSE3-NEXT: paddb %xmm3, %xmm0
4226 ; SSSE3-NEXT: psrlw $8, %xmm0
4227 ; SSSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4230 ; SSE41-LABEL: ugt_10_v8i16:
4232 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4233 ; SSE41-NEXT: movdqa %xmm0, %xmm2
4234 ; SSE41-NEXT: pand %xmm1, %xmm2
4235 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4236 ; SSE41-NEXT: movdqa %xmm3, %xmm4
4237 ; SSE41-NEXT: pshufb %xmm2, %xmm4
4238 ; SSE41-NEXT: psrlw $4, %xmm0
4239 ; SSE41-NEXT: pand %xmm1, %xmm0
4240 ; SSE41-NEXT: pshufb %xmm0, %xmm3
4241 ; SSE41-NEXT: paddb %xmm4, %xmm3
4242 ; SSE41-NEXT: movdqa %xmm3, %xmm0
4243 ; SSE41-NEXT: psllw $8, %xmm0
4244 ; SSE41-NEXT: paddb %xmm3, %xmm0
4245 ; SSE41-NEXT: psrlw $8, %xmm0
4246 ; SSE41-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4249 ; AVX1-LABEL: ugt_10_v8i16:
4251 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4252 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
4253 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4254 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
4255 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
4256 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
4257 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
4258 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4259 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
4260 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
4261 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
4262 ; AVX1-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4265 ; AVX2-LABEL: ugt_10_v8i16:
4267 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4268 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
4269 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4270 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
4271 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
4272 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
4273 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
4274 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4275 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
4276 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
4277 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
4278 ; AVX2-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4281 ; AVX512VPOPCNTDQ-LABEL: ugt_10_v8i16:
4282 ; AVX512VPOPCNTDQ: # %bb.0:
4283 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4284 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
4285 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
4286 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4287 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
4288 ; AVX512VPOPCNTDQ-NEXT: retq
4290 ; AVX512VPOPCNTDQVL-LABEL: ugt_10_v8i16:
4291 ; AVX512VPOPCNTDQVL: # %bb.0:
4292 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4293 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
4294 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
4295 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4296 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
4297 ; AVX512VPOPCNTDQVL-NEXT: retq
4299 ; BITALG_NOVLX-LABEL: ugt_10_v8i16:
4300 ; BITALG_NOVLX: # %bb.0:
4301 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4302 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
4303 ; BITALG_NOVLX-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4304 ; BITALG_NOVLX-NEXT: vzeroupper
4305 ; BITALG_NOVLX-NEXT: retq
4307 ; BITALG-LABEL: ugt_10_v8i16:
4309 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
4310 ; BITALG-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4312 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
4313 %3 = icmp ugt <8 x i16> %2, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
4314 %4 = sext <8 x i1> %3 to <8 x i16>
4318 define <8 x i16> @ult_11_v8i16(<8 x i16> %0) {
4319 ; SSE2-LABEL: ult_11_v8i16:
4321 ; SSE2-NEXT: movdqa %xmm0, %xmm1
4322 ; SSE2-NEXT: psrlw $1, %xmm1
4323 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4324 ; SSE2-NEXT: psubb %xmm1, %xmm0
4325 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4326 ; SSE2-NEXT: movdqa %xmm0, %xmm2
4327 ; SSE2-NEXT: pand %xmm1, %xmm2
4328 ; SSE2-NEXT: psrlw $2, %xmm0
4329 ; SSE2-NEXT: pand %xmm1, %xmm0
4330 ; SSE2-NEXT: paddb %xmm2, %xmm0
4331 ; SSE2-NEXT: movdqa %xmm0, %xmm1
4332 ; SSE2-NEXT: psrlw $4, %xmm1
4333 ; SSE2-NEXT: paddb %xmm0, %xmm1
4334 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4335 ; SSE2-NEXT: movdqa %xmm1, %xmm2
4336 ; SSE2-NEXT: psllw $8, %xmm2
4337 ; SSE2-NEXT: paddb %xmm1, %xmm2
4338 ; SSE2-NEXT: psrlw $8, %xmm2
4339 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11,11,11,11,11]
4340 ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0
4343 ; SSE3-LABEL: ult_11_v8i16:
4345 ; SSE3-NEXT: movdqa %xmm0, %xmm1
4346 ; SSE3-NEXT: psrlw $1, %xmm1
4347 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4348 ; SSE3-NEXT: psubb %xmm1, %xmm0
4349 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4350 ; SSE3-NEXT: movdqa %xmm0, %xmm2
4351 ; SSE3-NEXT: pand %xmm1, %xmm2
4352 ; SSE3-NEXT: psrlw $2, %xmm0
4353 ; SSE3-NEXT: pand %xmm1, %xmm0
4354 ; SSE3-NEXT: paddb %xmm2, %xmm0
4355 ; SSE3-NEXT: movdqa %xmm0, %xmm1
4356 ; SSE3-NEXT: psrlw $4, %xmm1
4357 ; SSE3-NEXT: paddb %xmm0, %xmm1
4358 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4359 ; SSE3-NEXT: movdqa %xmm1, %xmm2
4360 ; SSE3-NEXT: psllw $8, %xmm2
4361 ; SSE3-NEXT: paddb %xmm1, %xmm2
4362 ; SSE3-NEXT: psrlw $8, %xmm2
4363 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11,11,11,11,11]
4364 ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0
4367 ; SSSE3-LABEL: ult_11_v8i16:
4369 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4370 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
4371 ; SSSE3-NEXT: pand %xmm1, %xmm2
4372 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4373 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
4374 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
4375 ; SSSE3-NEXT: psrlw $4, %xmm0
4376 ; SSSE3-NEXT: pand %xmm1, %xmm0
4377 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
4378 ; SSSE3-NEXT: paddb %xmm4, %xmm3
4379 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
4380 ; SSSE3-NEXT: psllw $8, %xmm1
4381 ; SSSE3-NEXT: paddb %xmm3, %xmm1
4382 ; SSSE3-NEXT: psrlw $8, %xmm1
4383 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11,11,11,11,11]
4384 ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
4387 ; SSE41-LABEL: ult_11_v8i16:
4389 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4390 ; SSE41-NEXT: movdqa %xmm0, %xmm2
4391 ; SSE41-NEXT: pand %xmm1, %xmm2
4392 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4393 ; SSE41-NEXT: movdqa %xmm3, %xmm4
4394 ; SSE41-NEXT: pshufb %xmm2, %xmm4
4395 ; SSE41-NEXT: psrlw $4, %xmm0
4396 ; SSE41-NEXT: pand %xmm1, %xmm0
4397 ; SSE41-NEXT: pshufb %xmm0, %xmm3
4398 ; SSE41-NEXT: paddb %xmm4, %xmm3
4399 ; SSE41-NEXT: movdqa %xmm3, %xmm1
4400 ; SSE41-NEXT: psllw $8, %xmm1
4401 ; SSE41-NEXT: paddb %xmm3, %xmm1
4402 ; SSE41-NEXT: psrlw $8, %xmm1
4403 ; SSE41-NEXT: pmovsxbw {{.*#+}} xmm0 = [11,11,11,11,11,11,11,11]
4404 ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0
4407 ; AVX1-LABEL: ult_11_v8i16:
4409 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4410 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
4411 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4412 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
4413 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
4414 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
4415 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
4416 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4417 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
4418 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
4419 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
4420 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11]
4421 ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4424 ; AVX2-LABEL: ult_11_v8i16:
4426 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4427 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
4428 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4429 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
4430 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
4431 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
4432 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
4433 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4434 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
4435 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
4436 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
4437 ; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11]
4438 ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4441 ; AVX512VPOPCNTDQ-LABEL: ult_11_v8i16:
4442 ; AVX512VPOPCNTDQ: # %bb.0:
4443 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4444 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
4445 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
4446 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastw {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11]
4447 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4448 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
4449 ; AVX512VPOPCNTDQ-NEXT: retq
4451 ; AVX512VPOPCNTDQVL-LABEL: ult_11_v8i16:
4452 ; AVX512VPOPCNTDQVL: # %bb.0:
4453 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4454 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
4455 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
4456 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11]
4457 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4458 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
4459 ; AVX512VPOPCNTDQVL-NEXT: retq
4461 ; BITALG_NOVLX-LABEL: ult_11_v8i16:
4462 ; BITALG_NOVLX: # %bb.0:
4463 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4464 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
4465 ; BITALG_NOVLX-NEXT: vpbroadcastw {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11]
4466 ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4467 ; BITALG_NOVLX-NEXT: vzeroupper
4468 ; BITALG_NOVLX-NEXT: retq
4470 ; BITALG-LABEL: ult_11_v8i16:
4472 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
4473 ; BITALG-NEXT: vpbroadcastw {{.*#+}} xmm1 = [11,11,11,11,11,11,11,11]
4474 ; BITALG-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4476 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
4477 %3 = icmp ult <8 x i16> %2, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
4478 %4 = sext <8 x i1> %3 to <8 x i16>
4482 define <8 x i16> @ugt_11_v8i16(<8 x i16> %0) {
4483 ; SSE2-LABEL: ugt_11_v8i16:
4485 ; SSE2-NEXT: movdqa %xmm0, %xmm1
4486 ; SSE2-NEXT: psrlw $1, %xmm1
4487 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4488 ; SSE2-NEXT: psubb %xmm1, %xmm0
4489 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4490 ; SSE2-NEXT: movdqa %xmm0, %xmm2
4491 ; SSE2-NEXT: pand %xmm1, %xmm2
4492 ; SSE2-NEXT: psrlw $2, %xmm0
4493 ; SSE2-NEXT: pand %xmm1, %xmm0
4494 ; SSE2-NEXT: paddb %xmm2, %xmm0
4495 ; SSE2-NEXT: movdqa %xmm0, %xmm1
4496 ; SSE2-NEXT: psrlw $4, %xmm1
4497 ; SSE2-NEXT: paddb %xmm1, %xmm0
4498 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4499 ; SSE2-NEXT: movdqa %xmm0, %xmm1
4500 ; SSE2-NEXT: psllw $8, %xmm1
4501 ; SSE2-NEXT: paddb %xmm1, %xmm0
4502 ; SSE2-NEXT: psrlw $8, %xmm0
4503 ; SSE2-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4506 ; SSE3-LABEL: ugt_11_v8i16:
4508 ; SSE3-NEXT: movdqa %xmm0, %xmm1
4509 ; SSE3-NEXT: psrlw $1, %xmm1
4510 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4511 ; SSE3-NEXT: psubb %xmm1, %xmm0
4512 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4513 ; SSE3-NEXT: movdqa %xmm0, %xmm2
4514 ; SSE3-NEXT: pand %xmm1, %xmm2
4515 ; SSE3-NEXT: psrlw $2, %xmm0
4516 ; SSE3-NEXT: pand %xmm1, %xmm0
4517 ; SSE3-NEXT: paddb %xmm2, %xmm0
4518 ; SSE3-NEXT: movdqa %xmm0, %xmm1
4519 ; SSE3-NEXT: psrlw $4, %xmm1
4520 ; SSE3-NEXT: paddb %xmm1, %xmm0
4521 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4522 ; SSE3-NEXT: movdqa %xmm0, %xmm1
4523 ; SSE3-NEXT: psllw $8, %xmm1
4524 ; SSE3-NEXT: paddb %xmm1, %xmm0
4525 ; SSE3-NEXT: psrlw $8, %xmm0
4526 ; SSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4529 ; SSSE3-LABEL: ugt_11_v8i16:
4531 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4532 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
4533 ; SSSE3-NEXT: pand %xmm1, %xmm2
4534 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4535 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
4536 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
4537 ; SSSE3-NEXT: psrlw $4, %xmm0
4538 ; SSSE3-NEXT: pand %xmm1, %xmm0
4539 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
4540 ; SSSE3-NEXT: paddb %xmm4, %xmm3
4541 ; SSSE3-NEXT: movdqa %xmm3, %xmm0
4542 ; SSSE3-NEXT: psllw $8, %xmm0
4543 ; SSSE3-NEXT: paddb %xmm3, %xmm0
4544 ; SSSE3-NEXT: psrlw $8, %xmm0
4545 ; SSSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4548 ; SSE41-LABEL: ugt_11_v8i16:
4550 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4551 ; SSE41-NEXT: movdqa %xmm0, %xmm2
4552 ; SSE41-NEXT: pand %xmm1, %xmm2
4553 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4554 ; SSE41-NEXT: movdqa %xmm3, %xmm4
4555 ; SSE41-NEXT: pshufb %xmm2, %xmm4
4556 ; SSE41-NEXT: psrlw $4, %xmm0
4557 ; SSE41-NEXT: pand %xmm1, %xmm0
4558 ; SSE41-NEXT: pshufb %xmm0, %xmm3
4559 ; SSE41-NEXT: paddb %xmm4, %xmm3
4560 ; SSE41-NEXT: movdqa %xmm3, %xmm0
4561 ; SSE41-NEXT: psllw $8, %xmm0
4562 ; SSE41-NEXT: paddb %xmm3, %xmm0
4563 ; SSE41-NEXT: psrlw $8, %xmm0
4564 ; SSE41-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4567 ; AVX1-LABEL: ugt_11_v8i16:
4569 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4570 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
4571 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4572 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
4573 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
4574 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
4575 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
4576 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4577 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
4578 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
4579 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
4580 ; AVX1-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4583 ; AVX2-LABEL: ugt_11_v8i16:
4585 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4586 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
4587 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4588 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
4589 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
4590 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
4591 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
4592 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4593 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
4594 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
4595 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
4596 ; AVX2-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4599 ; AVX512VPOPCNTDQ-LABEL: ugt_11_v8i16:
4600 ; AVX512VPOPCNTDQ: # %bb.0:
4601 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4602 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
4603 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
4604 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4605 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
4606 ; AVX512VPOPCNTDQ-NEXT: retq
4608 ; AVX512VPOPCNTDQVL-LABEL: ugt_11_v8i16:
4609 ; AVX512VPOPCNTDQVL: # %bb.0:
4610 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4611 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
4612 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
4613 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4614 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
4615 ; AVX512VPOPCNTDQVL-NEXT: retq
4617 ; BITALG_NOVLX-LABEL: ugt_11_v8i16:
4618 ; BITALG_NOVLX: # %bb.0:
4619 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4620 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
4621 ; BITALG_NOVLX-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4622 ; BITALG_NOVLX-NEXT: vzeroupper
4623 ; BITALG_NOVLX-NEXT: retq
4625 ; BITALG-LABEL: ugt_11_v8i16:
4627 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
4628 ; BITALG-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4630 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
4631 %3 = icmp ugt <8 x i16> %2, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
4632 %4 = sext <8 x i1> %3 to <8 x i16>
4636 define <8 x i16> @ult_12_v8i16(<8 x i16> %0) {
4637 ; SSE2-LABEL: ult_12_v8i16:
4639 ; SSE2-NEXT: movdqa %xmm0, %xmm1
4640 ; SSE2-NEXT: psrlw $1, %xmm1
4641 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4642 ; SSE2-NEXT: psubb %xmm1, %xmm0
4643 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4644 ; SSE2-NEXT: movdqa %xmm0, %xmm2
4645 ; SSE2-NEXT: pand %xmm1, %xmm2
4646 ; SSE2-NEXT: psrlw $2, %xmm0
4647 ; SSE2-NEXT: pand %xmm1, %xmm0
4648 ; SSE2-NEXT: paddb %xmm2, %xmm0
4649 ; SSE2-NEXT: movdqa %xmm0, %xmm1
4650 ; SSE2-NEXT: psrlw $4, %xmm1
4651 ; SSE2-NEXT: paddb %xmm0, %xmm1
4652 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4653 ; SSE2-NEXT: movdqa %xmm1, %xmm2
4654 ; SSE2-NEXT: psllw $8, %xmm2
4655 ; SSE2-NEXT: paddb %xmm1, %xmm2
4656 ; SSE2-NEXT: psrlw $8, %xmm2
4657 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12,12,12,12,12]
4658 ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0
4661 ; SSE3-LABEL: ult_12_v8i16:
4663 ; SSE3-NEXT: movdqa %xmm0, %xmm1
4664 ; SSE3-NEXT: psrlw $1, %xmm1
4665 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4666 ; SSE3-NEXT: psubb %xmm1, %xmm0
4667 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4668 ; SSE3-NEXT: movdqa %xmm0, %xmm2
4669 ; SSE3-NEXT: pand %xmm1, %xmm2
4670 ; SSE3-NEXT: psrlw $2, %xmm0
4671 ; SSE3-NEXT: pand %xmm1, %xmm0
4672 ; SSE3-NEXT: paddb %xmm2, %xmm0
4673 ; SSE3-NEXT: movdqa %xmm0, %xmm1
4674 ; SSE3-NEXT: psrlw $4, %xmm1
4675 ; SSE3-NEXT: paddb %xmm0, %xmm1
4676 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4677 ; SSE3-NEXT: movdqa %xmm1, %xmm2
4678 ; SSE3-NEXT: psllw $8, %xmm2
4679 ; SSE3-NEXT: paddb %xmm1, %xmm2
4680 ; SSE3-NEXT: psrlw $8, %xmm2
4681 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12,12,12,12,12]
4682 ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0
4685 ; SSSE3-LABEL: ult_12_v8i16:
4687 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4688 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
4689 ; SSSE3-NEXT: pand %xmm1, %xmm2
4690 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4691 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
4692 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
4693 ; SSSE3-NEXT: psrlw $4, %xmm0
4694 ; SSSE3-NEXT: pand %xmm1, %xmm0
4695 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
4696 ; SSSE3-NEXT: paddb %xmm4, %xmm3
4697 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
4698 ; SSSE3-NEXT: psllw $8, %xmm1
4699 ; SSSE3-NEXT: paddb %xmm3, %xmm1
4700 ; SSSE3-NEXT: psrlw $8, %xmm1
4701 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12,12,12,12,12]
4702 ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
4705 ; SSE41-LABEL: ult_12_v8i16:
4707 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4708 ; SSE41-NEXT: movdqa %xmm0, %xmm2
4709 ; SSE41-NEXT: pand %xmm1, %xmm2
4710 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4711 ; SSE41-NEXT: movdqa %xmm3, %xmm4
4712 ; SSE41-NEXT: pshufb %xmm2, %xmm4
4713 ; SSE41-NEXT: psrlw $4, %xmm0
4714 ; SSE41-NEXT: pand %xmm1, %xmm0
4715 ; SSE41-NEXT: pshufb %xmm0, %xmm3
4716 ; SSE41-NEXT: paddb %xmm4, %xmm3
4717 ; SSE41-NEXT: movdqa %xmm3, %xmm1
4718 ; SSE41-NEXT: psllw $8, %xmm1
4719 ; SSE41-NEXT: paddb %xmm3, %xmm1
4720 ; SSE41-NEXT: psrlw $8, %xmm1
4721 ; SSE41-NEXT: pmovsxbw {{.*#+}} xmm0 = [12,12,12,12,12,12,12,12]
4722 ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0
4725 ; AVX1-LABEL: ult_12_v8i16:
4727 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4728 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
4729 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4730 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
4731 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
4732 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
4733 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
4734 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4735 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
4736 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
4737 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
4738 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12]
4739 ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4742 ; AVX2-LABEL: ult_12_v8i16:
4744 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4745 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
4746 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4747 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
4748 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
4749 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
4750 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
4751 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4752 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
4753 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
4754 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
4755 ; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12]
4756 ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4759 ; AVX512VPOPCNTDQ-LABEL: ult_12_v8i16:
4760 ; AVX512VPOPCNTDQ: # %bb.0:
4761 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4762 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
4763 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
4764 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastw {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12]
4765 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4766 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
4767 ; AVX512VPOPCNTDQ-NEXT: retq
4769 ; AVX512VPOPCNTDQVL-LABEL: ult_12_v8i16:
4770 ; AVX512VPOPCNTDQVL: # %bb.0:
4771 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4772 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
4773 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
4774 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12]
4775 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4776 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
4777 ; AVX512VPOPCNTDQVL-NEXT: retq
4779 ; BITALG_NOVLX-LABEL: ult_12_v8i16:
4780 ; BITALG_NOVLX: # %bb.0:
4781 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4782 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
4783 ; BITALG_NOVLX-NEXT: vpbroadcastw {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12]
4784 ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4785 ; BITALG_NOVLX-NEXT: vzeroupper
4786 ; BITALG_NOVLX-NEXT: retq
4788 ; BITALG-LABEL: ult_12_v8i16:
4790 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
4791 ; BITALG-NEXT: vpbroadcastw {{.*#+}} xmm1 = [12,12,12,12,12,12,12,12]
4792 ; BITALG-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
4794 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
4795 %3 = icmp ult <8 x i16> %2, <i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12>
4796 %4 = sext <8 x i1> %3 to <8 x i16>
4800 define <8 x i16> @ugt_12_v8i16(<8 x i16> %0) {
4801 ; SSE2-LABEL: ugt_12_v8i16:
4803 ; SSE2-NEXT: movdqa %xmm0, %xmm1
4804 ; SSE2-NEXT: psrlw $1, %xmm1
4805 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4806 ; SSE2-NEXT: psubb %xmm1, %xmm0
4807 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4808 ; SSE2-NEXT: movdqa %xmm0, %xmm2
4809 ; SSE2-NEXT: pand %xmm1, %xmm2
4810 ; SSE2-NEXT: psrlw $2, %xmm0
4811 ; SSE2-NEXT: pand %xmm1, %xmm0
4812 ; SSE2-NEXT: paddb %xmm2, %xmm0
4813 ; SSE2-NEXT: movdqa %xmm0, %xmm1
4814 ; SSE2-NEXT: psrlw $4, %xmm1
4815 ; SSE2-NEXT: paddb %xmm1, %xmm0
4816 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4817 ; SSE2-NEXT: movdqa %xmm0, %xmm1
4818 ; SSE2-NEXT: psllw $8, %xmm1
4819 ; SSE2-NEXT: paddb %xmm1, %xmm0
4820 ; SSE2-NEXT: psrlw $8, %xmm0
4821 ; SSE2-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4824 ; SSE3-LABEL: ugt_12_v8i16:
4826 ; SSE3-NEXT: movdqa %xmm0, %xmm1
4827 ; SSE3-NEXT: psrlw $1, %xmm1
4828 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4829 ; SSE3-NEXT: psubb %xmm1, %xmm0
4830 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4831 ; SSE3-NEXT: movdqa %xmm0, %xmm2
4832 ; SSE3-NEXT: pand %xmm1, %xmm2
4833 ; SSE3-NEXT: psrlw $2, %xmm0
4834 ; SSE3-NEXT: pand %xmm1, %xmm0
4835 ; SSE3-NEXT: paddb %xmm2, %xmm0
4836 ; SSE3-NEXT: movdqa %xmm0, %xmm1
4837 ; SSE3-NEXT: psrlw $4, %xmm1
4838 ; SSE3-NEXT: paddb %xmm1, %xmm0
4839 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4840 ; SSE3-NEXT: movdqa %xmm0, %xmm1
4841 ; SSE3-NEXT: psllw $8, %xmm1
4842 ; SSE3-NEXT: paddb %xmm1, %xmm0
4843 ; SSE3-NEXT: psrlw $8, %xmm0
4844 ; SSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4847 ; SSSE3-LABEL: ugt_12_v8i16:
4849 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4850 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
4851 ; SSSE3-NEXT: pand %xmm1, %xmm2
4852 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4853 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
4854 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
4855 ; SSSE3-NEXT: psrlw $4, %xmm0
4856 ; SSSE3-NEXT: pand %xmm1, %xmm0
4857 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
4858 ; SSSE3-NEXT: paddb %xmm4, %xmm3
4859 ; SSSE3-NEXT: movdqa %xmm3, %xmm0
4860 ; SSSE3-NEXT: psllw $8, %xmm0
4861 ; SSSE3-NEXT: paddb %xmm3, %xmm0
4862 ; SSSE3-NEXT: psrlw $8, %xmm0
4863 ; SSSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4866 ; SSE41-LABEL: ugt_12_v8i16:
4868 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4869 ; SSE41-NEXT: movdqa %xmm0, %xmm2
4870 ; SSE41-NEXT: pand %xmm1, %xmm2
4871 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4872 ; SSE41-NEXT: movdqa %xmm3, %xmm4
4873 ; SSE41-NEXT: pshufb %xmm2, %xmm4
4874 ; SSE41-NEXT: psrlw $4, %xmm0
4875 ; SSE41-NEXT: pand %xmm1, %xmm0
4876 ; SSE41-NEXT: pshufb %xmm0, %xmm3
4877 ; SSE41-NEXT: paddb %xmm4, %xmm3
4878 ; SSE41-NEXT: movdqa %xmm3, %xmm0
4879 ; SSE41-NEXT: psllw $8, %xmm0
4880 ; SSE41-NEXT: paddb %xmm3, %xmm0
4881 ; SSE41-NEXT: psrlw $8, %xmm0
4882 ; SSE41-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
4885 ; AVX1-LABEL: ugt_12_v8i16:
4887 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4888 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
4889 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4890 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
4891 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
4892 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
4893 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
4894 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4895 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
4896 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
4897 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
4898 ; AVX1-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4901 ; AVX2-LABEL: ugt_12_v8i16:
4903 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4904 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
4905 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
4906 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
4907 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
4908 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
4909 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
4910 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
4911 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
4912 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
4913 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
4914 ; AVX2-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4917 ; AVX512VPOPCNTDQ-LABEL: ugt_12_v8i16:
4918 ; AVX512VPOPCNTDQ: # %bb.0:
4919 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4920 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
4921 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
4922 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4923 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
4924 ; AVX512VPOPCNTDQ-NEXT: retq
4926 ; AVX512VPOPCNTDQVL-LABEL: ugt_12_v8i16:
4927 ; AVX512VPOPCNTDQVL: # %bb.0:
4928 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4929 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
4930 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
4931 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4932 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
4933 ; AVX512VPOPCNTDQVL-NEXT: retq
4935 ; BITALG_NOVLX-LABEL: ugt_12_v8i16:
4936 ; BITALG_NOVLX: # %bb.0:
4937 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
4938 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
4939 ; BITALG_NOVLX-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4940 ; BITALG_NOVLX-NEXT: vzeroupper
4941 ; BITALG_NOVLX-NEXT: retq
4943 ; BITALG-LABEL: ugt_12_v8i16:
4945 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
4946 ; BITALG-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
4948 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
4949 %3 = icmp ugt <8 x i16> %2, <i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12>
4950 %4 = sext <8 x i1> %3 to <8 x i16>
4954 define <8 x i16> @ult_13_v8i16(<8 x i16> %0) {
4955 ; SSE2-LABEL: ult_13_v8i16:
4957 ; SSE2-NEXT: movdqa %xmm0, %xmm1
4958 ; SSE2-NEXT: psrlw $1, %xmm1
4959 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4960 ; SSE2-NEXT: psubb %xmm1, %xmm0
4961 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4962 ; SSE2-NEXT: movdqa %xmm0, %xmm2
4963 ; SSE2-NEXT: pand %xmm1, %xmm2
4964 ; SSE2-NEXT: psrlw $2, %xmm0
4965 ; SSE2-NEXT: pand %xmm1, %xmm0
4966 ; SSE2-NEXT: paddb %xmm2, %xmm0
4967 ; SSE2-NEXT: movdqa %xmm0, %xmm1
4968 ; SSE2-NEXT: psrlw $4, %xmm1
4969 ; SSE2-NEXT: paddb %xmm0, %xmm1
4970 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4971 ; SSE2-NEXT: movdqa %xmm1, %xmm2
4972 ; SSE2-NEXT: psllw $8, %xmm2
4973 ; SSE2-NEXT: paddb %xmm1, %xmm2
4974 ; SSE2-NEXT: psrlw $8, %xmm2
4975 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13,13,13,13,13]
4976 ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0
4979 ; SSE3-LABEL: ult_13_v8i16:
4981 ; SSE3-NEXT: movdqa %xmm0, %xmm1
4982 ; SSE3-NEXT: psrlw $1, %xmm1
4983 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4984 ; SSE3-NEXT: psubb %xmm1, %xmm0
4985 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
4986 ; SSE3-NEXT: movdqa %xmm0, %xmm2
4987 ; SSE3-NEXT: pand %xmm1, %xmm2
4988 ; SSE3-NEXT: psrlw $2, %xmm0
4989 ; SSE3-NEXT: pand %xmm1, %xmm0
4990 ; SSE3-NEXT: paddb %xmm2, %xmm0
4991 ; SSE3-NEXT: movdqa %xmm0, %xmm1
4992 ; SSE3-NEXT: psrlw $4, %xmm1
4993 ; SSE3-NEXT: paddb %xmm0, %xmm1
4994 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
4995 ; SSE3-NEXT: movdqa %xmm1, %xmm2
4996 ; SSE3-NEXT: psllw $8, %xmm2
4997 ; SSE3-NEXT: paddb %xmm1, %xmm2
4998 ; SSE3-NEXT: psrlw $8, %xmm2
4999 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13,13,13,13,13]
5000 ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0
5003 ; SSSE3-LABEL: ult_13_v8i16:
5005 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5006 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
5007 ; SSSE3-NEXT: pand %xmm1, %xmm2
5008 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5009 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
5010 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
5011 ; SSSE3-NEXT: psrlw $4, %xmm0
5012 ; SSSE3-NEXT: pand %xmm1, %xmm0
5013 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
5014 ; SSSE3-NEXT: paddb %xmm4, %xmm3
5015 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
5016 ; SSSE3-NEXT: psllw $8, %xmm1
5017 ; SSSE3-NEXT: paddb %xmm3, %xmm1
5018 ; SSSE3-NEXT: psrlw $8, %xmm1
5019 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13,13,13,13,13]
5020 ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
5023 ; SSE41-LABEL: ult_13_v8i16:
5025 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5026 ; SSE41-NEXT: movdqa %xmm0, %xmm2
5027 ; SSE41-NEXT: pand %xmm1, %xmm2
5028 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5029 ; SSE41-NEXT: movdqa %xmm3, %xmm4
5030 ; SSE41-NEXT: pshufb %xmm2, %xmm4
5031 ; SSE41-NEXT: psrlw $4, %xmm0
5032 ; SSE41-NEXT: pand %xmm1, %xmm0
5033 ; SSE41-NEXT: pshufb %xmm0, %xmm3
5034 ; SSE41-NEXT: paddb %xmm4, %xmm3
5035 ; SSE41-NEXT: movdqa %xmm3, %xmm1
5036 ; SSE41-NEXT: psllw $8, %xmm1
5037 ; SSE41-NEXT: paddb %xmm3, %xmm1
5038 ; SSE41-NEXT: psrlw $8, %xmm1
5039 ; SSE41-NEXT: pmovsxbw {{.*#+}} xmm0 = [13,13,13,13,13,13,13,13]
5040 ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0
5043 ; AVX1-LABEL: ult_13_v8i16:
5045 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5046 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
5047 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5048 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
5049 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
5050 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
5051 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
5052 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
5053 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
5054 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
5055 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
5056 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13]
5057 ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5060 ; AVX2-LABEL: ult_13_v8i16:
5062 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5063 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
5064 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5065 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
5066 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
5067 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
5068 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
5069 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
5070 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
5071 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
5072 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
5073 ; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13]
5074 ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5077 ; AVX512VPOPCNTDQ-LABEL: ult_13_v8i16:
5078 ; AVX512VPOPCNTDQ: # %bb.0:
5079 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5080 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
5081 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
5082 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastw {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13]
5083 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5084 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
5085 ; AVX512VPOPCNTDQ-NEXT: retq
5087 ; AVX512VPOPCNTDQVL-LABEL: ult_13_v8i16:
5088 ; AVX512VPOPCNTDQVL: # %bb.0:
5089 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5090 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
5091 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
5092 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13]
5093 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5094 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
5095 ; AVX512VPOPCNTDQVL-NEXT: retq
5097 ; BITALG_NOVLX-LABEL: ult_13_v8i16:
5098 ; BITALG_NOVLX: # %bb.0:
5099 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
5100 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
5101 ; BITALG_NOVLX-NEXT: vpbroadcastw {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13]
5102 ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5103 ; BITALG_NOVLX-NEXT: vzeroupper
5104 ; BITALG_NOVLX-NEXT: retq
5106 ; BITALG-LABEL: ult_13_v8i16:
5108 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
5109 ; BITALG-NEXT: vpbroadcastw {{.*#+}} xmm1 = [13,13,13,13,13,13,13,13]
5110 ; BITALG-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5112 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
5113 %3 = icmp ult <8 x i16> %2, <i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13>
5114 %4 = sext <8 x i1> %3 to <8 x i16>
5118 define <8 x i16> @ugt_13_v8i16(<8 x i16> %0) {
5119 ; SSE2-LABEL: ugt_13_v8i16:
5121 ; SSE2-NEXT: movdqa %xmm0, %xmm1
5122 ; SSE2-NEXT: psrlw $1, %xmm1
5123 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5124 ; SSE2-NEXT: psubb %xmm1, %xmm0
5125 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5126 ; SSE2-NEXT: movdqa %xmm0, %xmm2
5127 ; SSE2-NEXT: pand %xmm1, %xmm2
5128 ; SSE2-NEXT: psrlw $2, %xmm0
5129 ; SSE2-NEXT: pand %xmm1, %xmm0
5130 ; SSE2-NEXT: paddb %xmm2, %xmm0
5131 ; SSE2-NEXT: movdqa %xmm0, %xmm1
5132 ; SSE2-NEXT: psrlw $4, %xmm1
5133 ; SSE2-NEXT: paddb %xmm1, %xmm0
5134 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5135 ; SSE2-NEXT: movdqa %xmm0, %xmm1
5136 ; SSE2-NEXT: psllw $8, %xmm1
5137 ; SSE2-NEXT: paddb %xmm1, %xmm0
5138 ; SSE2-NEXT: psrlw $8, %xmm0
5139 ; SSE2-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5142 ; SSE3-LABEL: ugt_13_v8i16:
5144 ; SSE3-NEXT: movdqa %xmm0, %xmm1
5145 ; SSE3-NEXT: psrlw $1, %xmm1
5146 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5147 ; SSE3-NEXT: psubb %xmm1, %xmm0
5148 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5149 ; SSE3-NEXT: movdqa %xmm0, %xmm2
5150 ; SSE3-NEXT: pand %xmm1, %xmm2
5151 ; SSE3-NEXT: psrlw $2, %xmm0
5152 ; SSE3-NEXT: pand %xmm1, %xmm0
5153 ; SSE3-NEXT: paddb %xmm2, %xmm0
5154 ; SSE3-NEXT: movdqa %xmm0, %xmm1
5155 ; SSE3-NEXT: psrlw $4, %xmm1
5156 ; SSE3-NEXT: paddb %xmm1, %xmm0
5157 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5158 ; SSE3-NEXT: movdqa %xmm0, %xmm1
5159 ; SSE3-NEXT: psllw $8, %xmm1
5160 ; SSE3-NEXT: paddb %xmm1, %xmm0
5161 ; SSE3-NEXT: psrlw $8, %xmm0
5162 ; SSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5165 ; SSSE3-LABEL: ugt_13_v8i16:
5167 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5168 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
5169 ; SSSE3-NEXT: pand %xmm1, %xmm2
5170 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5171 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
5172 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
5173 ; SSSE3-NEXT: psrlw $4, %xmm0
5174 ; SSSE3-NEXT: pand %xmm1, %xmm0
5175 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
5176 ; SSSE3-NEXT: paddb %xmm4, %xmm3
5177 ; SSSE3-NEXT: movdqa %xmm3, %xmm0
5178 ; SSSE3-NEXT: psllw $8, %xmm0
5179 ; SSSE3-NEXT: paddb %xmm3, %xmm0
5180 ; SSSE3-NEXT: psrlw $8, %xmm0
5181 ; SSSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5184 ; SSE41-LABEL: ugt_13_v8i16:
5186 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5187 ; SSE41-NEXT: movdqa %xmm0, %xmm2
5188 ; SSE41-NEXT: pand %xmm1, %xmm2
5189 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5190 ; SSE41-NEXT: movdqa %xmm3, %xmm4
5191 ; SSE41-NEXT: pshufb %xmm2, %xmm4
5192 ; SSE41-NEXT: psrlw $4, %xmm0
5193 ; SSE41-NEXT: pand %xmm1, %xmm0
5194 ; SSE41-NEXT: pshufb %xmm0, %xmm3
5195 ; SSE41-NEXT: paddb %xmm4, %xmm3
5196 ; SSE41-NEXT: movdqa %xmm3, %xmm0
5197 ; SSE41-NEXT: psllw $8, %xmm0
5198 ; SSE41-NEXT: paddb %xmm3, %xmm0
5199 ; SSE41-NEXT: psrlw $8, %xmm0
5200 ; SSE41-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5203 ; AVX1-LABEL: ugt_13_v8i16:
5205 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5206 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
5207 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5208 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
5209 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
5210 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
5211 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
5212 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
5213 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
5214 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
5215 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
5216 ; AVX1-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
5219 ; AVX2-LABEL: ugt_13_v8i16:
5221 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5222 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
5223 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5224 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
5225 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
5226 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
5227 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
5228 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
5229 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
5230 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
5231 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
5232 ; AVX2-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
5235 ; AVX512VPOPCNTDQ-LABEL: ugt_13_v8i16:
5236 ; AVX512VPOPCNTDQ: # %bb.0:
5237 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5238 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
5239 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
5240 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
5241 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
5242 ; AVX512VPOPCNTDQ-NEXT: retq
5244 ; AVX512VPOPCNTDQVL-LABEL: ugt_13_v8i16:
5245 ; AVX512VPOPCNTDQVL: # %bb.0:
5246 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5247 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
5248 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
5249 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
5250 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
5251 ; AVX512VPOPCNTDQVL-NEXT: retq
5253 ; BITALG_NOVLX-LABEL: ugt_13_v8i16:
5254 ; BITALG_NOVLX: # %bb.0:
5255 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
5256 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
5257 ; BITALG_NOVLX-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
5258 ; BITALG_NOVLX-NEXT: vzeroupper
5259 ; BITALG_NOVLX-NEXT: retq
5261 ; BITALG-LABEL: ugt_13_v8i16:
5263 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
5264 ; BITALG-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
5266 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
5267 %3 = icmp ugt <8 x i16> %2, <i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13, i16 13>
5268 %4 = sext <8 x i1> %3 to <8 x i16>
5272 define <8 x i16> @ult_14_v8i16(<8 x i16> %0) {
5273 ; SSE2-LABEL: ult_14_v8i16:
5275 ; SSE2-NEXT: movdqa %xmm0, %xmm1
5276 ; SSE2-NEXT: psrlw $1, %xmm1
5277 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5278 ; SSE2-NEXT: psubb %xmm1, %xmm0
5279 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5280 ; SSE2-NEXT: movdqa %xmm0, %xmm2
5281 ; SSE2-NEXT: pand %xmm1, %xmm2
5282 ; SSE2-NEXT: psrlw $2, %xmm0
5283 ; SSE2-NEXT: pand %xmm1, %xmm0
5284 ; SSE2-NEXT: paddb %xmm2, %xmm0
5285 ; SSE2-NEXT: movdqa %xmm0, %xmm1
5286 ; SSE2-NEXT: psrlw $4, %xmm1
5287 ; SSE2-NEXT: paddb %xmm0, %xmm1
5288 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5289 ; SSE2-NEXT: movdqa %xmm1, %xmm2
5290 ; SSE2-NEXT: psllw $8, %xmm2
5291 ; SSE2-NEXT: paddb %xmm1, %xmm2
5292 ; SSE2-NEXT: psrlw $8, %xmm2
5293 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14,14,14,14,14]
5294 ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0
5297 ; SSE3-LABEL: ult_14_v8i16:
5299 ; SSE3-NEXT: movdqa %xmm0, %xmm1
5300 ; SSE3-NEXT: psrlw $1, %xmm1
5301 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5302 ; SSE3-NEXT: psubb %xmm1, %xmm0
5303 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5304 ; SSE3-NEXT: movdqa %xmm0, %xmm2
5305 ; SSE3-NEXT: pand %xmm1, %xmm2
5306 ; SSE3-NEXT: psrlw $2, %xmm0
5307 ; SSE3-NEXT: pand %xmm1, %xmm0
5308 ; SSE3-NEXT: paddb %xmm2, %xmm0
5309 ; SSE3-NEXT: movdqa %xmm0, %xmm1
5310 ; SSE3-NEXT: psrlw $4, %xmm1
5311 ; SSE3-NEXT: paddb %xmm0, %xmm1
5312 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5313 ; SSE3-NEXT: movdqa %xmm1, %xmm2
5314 ; SSE3-NEXT: psllw $8, %xmm2
5315 ; SSE3-NEXT: paddb %xmm1, %xmm2
5316 ; SSE3-NEXT: psrlw $8, %xmm2
5317 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14,14,14,14,14]
5318 ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0
5321 ; SSSE3-LABEL: ult_14_v8i16:
5323 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5324 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
5325 ; SSSE3-NEXT: pand %xmm1, %xmm2
5326 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5327 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
5328 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
5329 ; SSSE3-NEXT: psrlw $4, %xmm0
5330 ; SSSE3-NEXT: pand %xmm1, %xmm0
5331 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
5332 ; SSSE3-NEXT: paddb %xmm4, %xmm3
5333 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
5334 ; SSSE3-NEXT: psllw $8, %xmm1
5335 ; SSSE3-NEXT: paddb %xmm3, %xmm1
5336 ; SSSE3-NEXT: psrlw $8, %xmm1
5337 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14,14,14,14,14]
5338 ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
5341 ; SSE41-LABEL: ult_14_v8i16:
5343 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5344 ; SSE41-NEXT: movdqa %xmm0, %xmm2
5345 ; SSE41-NEXT: pand %xmm1, %xmm2
5346 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5347 ; SSE41-NEXT: movdqa %xmm3, %xmm4
5348 ; SSE41-NEXT: pshufb %xmm2, %xmm4
5349 ; SSE41-NEXT: psrlw $4, %xmm0
5350 ; SSE41-NEXT: pand %xmm1, %xmm0
5351 ; SSE41-NEXT: pshufb %xmm0, %xmm3
5352 ; SSE41-NEXT: paddb %xmm4, %xmm3
5353 ; SSE41-NEXT: movdqa %xmm3, %xmm1
5354 ; SSE41-NEXT: psllw $8, %xmm1
5355 ; SSE41-NEXT: paddb %xmm3, %xmm1
5356 ; SSE41-NEXT: psrlw $8, %xmm1
5357 ; SSE41-NEXT: pmovsxbw {{.*#+}} xmm0 = [14,14,14,14,14,14,14,14]
5358 ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0
5361 ; AVX1-LABEL: ult_14_v8i16:
5363 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5364 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
5365 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5366 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
5367 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
5368 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
5369 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
5370 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
5371 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
5372 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
5373 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
5374 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14]
5375 ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5378 ; AVX2-LABEL: ult_14_v8i16:
5380 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5381 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
5382 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5383 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
5384 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
5385 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
5386 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
5387 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
5388 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
5389 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
5390 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
5391 ; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14]
5392 ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5395 ; AVX512VPOPCNTDQ-LABEL: ult_14_v8i16:
5396 ; AVX512VPOPCNTDQ: # %bb.0:
5397 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5398 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
5399 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
5400 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastw {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14]
5401 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5402 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
5403 ; AVX512VPOPCNTDQ-NEXT: retq
5405 ; AVX512VPOPCNTDQVL-LABEL: ult_14_v8i16:
5406 ; AVX512VPOPCNTDQVL: # %bb.0:
5407 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5408 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
5409 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
5410 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14]
5411 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5412 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
5413 ; AVX512VPOPCNTDQVL-NEXT: retq
5415 ; BITALG_NOVLX-LABEL: ult_14_v8i16:
5416 ; BITALG_NOVLX: # %bb.0:
5417 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
5418 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
5419 ; BITALG_NOVLX-NEXT: vpbroadcastw {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14]
5420 ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5421 ; BITALG_NOVLX-NEXT: vzeroupper
5422 ; BITALG_NOVLX-NEXT: retq
5424 ; BITALG-LABEL: ult_14_v8i16:
5426 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
5427 ; BITALG-NEXT: vpbroadcastw {{.*#+}} xmm1 = [14,14,14,14,14,14,14,14]
5428 ; BITALG-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5430 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
5431 %3 = icmp ult <8 x i16> %2, <i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14>
5432 %4 = sext <8 x i1> %3 to <8 x i16>
5436 define <8 x i16> @ugt_14_v8i16(<8 x i16> %0) {
5437 ; SSE2-LABEL: ugt_14_v8i16:
5439 ; SSE2-NEXT: movdqa %xmm0, %xmm1
5440 ; SSE2-NEXT: psrlw $1, %xmm1
5441 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5442 ; SSE2-NEXT: psubb %xmm1, %xmm0
5443 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5444 ; SSE2-NEXT: movdqa %xmm0, %xmm2
5445 ; SSE2-NEXT: pand %xmm1, %xmm2
5446 ; SSE2-NEXT: psrlw $2, %xmm0
5447 ; SSE2-NEXT: pand %xmm1, %xmm0
5448 ; SSE2-NEXT: paddb %xmm2, %xmm0
5449 ; SSE2-NEXT: movdqa %xmm0, %xmm1
5450 ; SSE2-NEXT: psrlw $4, %xmm1
5451 ; SSE2-NEXT: paddb %xmm1, %xmm0
5452 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5453 ; SSE2-NEXT: movdqa %xmm0, %xmm1
5454 ; SSE2-NEXT: psllw $8, %xmm1
5455 ; SSE2-NEXT: paddb %xmm1, %xmm0
5456 ; SSE2-NEXT: psrlw $8, %xmm0
5457 ; SSE2-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5460 ; SSE3-LABEL: ugt_14_v8i16:
5462 ; SSE3-NEXT: movdqa %xmm0, %xmm1
5463 ; SSE3-NEXT: psrlw $1, %xmm1
5464 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5465 ; SSE3-NEXT: psubb %xmm1, %xmm0
5466 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5467 ; SSE3-NEXT: movdqa %xmm0, %xmm2
5468 ; SSE3-NEXT: pand %xmm1, %xmm2
5469 ; SSE3-NEXT: psrlw $2, %xmm0
5470 ; SSE3-NEXT: pand %xmm1, %xmm0
5471 ; SSE3-NEXT: paddb %xmm2, %xmm0
5472 ; SSE3-NEXT: movdqa %xmm0, %xmm1
5473 ; SSE3-NEXT: psrlw $4, %xmm1
5474 ; SSE3-NEXT: paddb %xmm1, %xmm0
5475 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5476 ; SSE3-NEXT: movdqa %xmm0, %xmm1
5477 ; SSE3-NEXT: psllw $8, %xmm1
5478 ; SSE3-NEXT: paddb %xmm1, %xmm0
5479 ; SSE3-NEXT: psrlw $8, %xmm0
5480 ; SSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5483 ; SSSE3-LABEL: ugt_14_v8i16:
5485 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5486 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
5487 ; SSSE3-NEXT: pand %xmm1, %xmm2
5488 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5489 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
5490 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
5491 ; SSSE3-NEXT: psrlw $4, %xmm0
5492 ; SSSE3-NEXT: pand %xmm1, %xmm0
5493 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
5494 ; SSSE3-NEXT: paddb %xmm4, %xmm3
5495 ; SSSE3-NEXT: movdqa %xmm3, %xmm0
5496 ; SSSE3-NEXT: psllw $8, %xmm0
5497 ; SSSE3-NEXT: paddb %xmm3, %xmm0
5498 ; SSSE3-NEXT: psrlw $8, %xmm0
5499 ; SSSE3-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5502 ; SSE41-LABEL: ugt_14_v8i16:
5504 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5505 ; SSE41-NEXT: movdqa %xmm0, %xmm2
5506 ; SSE41-NEXT: pand %xmm1, %xmm2
5507 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5508 ; SSE41-NEXT: movdqa %xmm3, %xmm4
5509 ; SSE41-NEXT: pshufb %xmm2, %xmm4
5510 ; SSE41-NEXT: psrlw $4, %xmm0
5511 ; SSE41-NEXT: pand %xmm1, %xmm0
5512 ; SSE41-NEXT: pshufb %xmm0, %xmm3
5513 ; SSE41-NEXT: paddb %xmm4, %xmm3
5514 ; SSE41-NEXT: movdqa %xmm3, %xmm0
5515 ; SSE41-NEXT: psllw $8, %xmm0
5516 ; SSE41-NEXT: paddb %xmm3, %xmm0
5517 ; SSE41-NEXT: psrlw $8, %xmm0
5518 ; SSE41-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5521 ; AVX1-LABEL: ugt_14_v8i16:
5523 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5524 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
5525 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5526 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
5527 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
5528 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
5529 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
5530 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
5531 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
5532 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
5533 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
5534 ; AVX1-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
5537 ; AVX2-LABEL: ugt_14_v8i16:
5539 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5540 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
5541 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5542 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
5543 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
5544 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
5545 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
5546 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
5547 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
5548 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
5549 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
5550 ; AVX2-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
5553 ; AVX512VPOPCNTDQ-LABEL: ugt_14_v8i16:
5554 ; AVX512VPOPCNTDQ: # %bb.0:
5555 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5556 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
5557 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
5558 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
5559 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
5560 ; AVX512VPOPCNTDQ-NEXT: retq
5562 ; AVX512VPOPCNTDQVL-LABEL: ugt_14_v8i16:
5563 ; AVX512VPOPCNTDQVL: # %bb.0:
5564 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5565 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
5566 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
5567 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
5568 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
5569 ; AVX512VPOPCNTDQVL-NEXT: retq
5571 ; BITALG_NOVLX-LABEL: ugt_14_v8i16:
5572 ; BITALG_NOVLX: # %bb.0:
5573 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
5574 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
5575 ; BITALG_NOVLX-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
5576 ; BITALG_NOVLX-NEXT: vzeroupper
5577 ; BITALG_NOVLX-NEXT: retq
5579 ; BITALG-LABEL: ugt_14_v8i16:
5581 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
5582 ; BITALG-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
5584 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
5585 %3 = icmp ugt <8 x i16> %2, <i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14>
5586 %4 = sext <8 x i1> %3 to <8 x i16>
5590 define <8 x i16> @ult_15_v8i16(<8 x i16> %0) {
5591 ; SSE2-LABEL: ult_15_v8i16:
5593 ; SSE2-NEXT: movdqa %xmm0, %xmm1
5594 ; SSE2-NEXT: psrlw $1, %xmm1
5595 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5596 ; SSE2-NEXT: psubb %xmm1, %xmm0
5597 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5598 ; SSE2-NEXT: movdqa %xmm0, %xmm2
5599 ; SSE2-NEXT: pand %xmm1, %xmm2
5600 ; SSE2-NEXT: psrlw $2, %xmm0
5601 ; SSE2-NEXT: pand %xmm1, %xmm0
5602 ; SSE2-NEXT: paddb %xmm2, %xmm0
5603 ; SSE2-NEXT: movdqa %xmm0, %xmm1
5604 ; SSE2-NEXT: psrlw $4, %xmm1
5605 ; SSE2-NEXT: paddb %xmm0, %xmm1
5606 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5607 ; SSE2-NEXT: movdqa %xmm1, %xmm2
5608 ; SSE2-NEXT: psllw $8, %xmm2
5609 ; SSE2-NEXT: paddb %xmm1, %xmm2
5610 ; SSE2-NEXT: psrlw $8, %xmm2
5611 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15]
5612 ; SSE2-NEXT: pcmpgtw %xmm2, %xmm0
5615 ; SSE3-LABEL: ult_15_v8i16:
5617 ; SSE3-NEXT: movdqa %xmm0, %xmm1
5618 ; SSE3-NEXT: psrlw $1, %xmm1
5619 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5620 ; SSE3-NEXT: psubb %xmm1, %xmm0
5621 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5622 ; SSE3-NEXT: movdqa %xmm0, %xmm2
5623 ; SSE3-NEXT: pand %xmm1, %xmm2
5624 ; SSE3-NEXT: psrlw $2, %xmm0
5625 ; SSE3-NEXT: pand %xmm1, %xmm0
5626 ; SSE3-NEXT: paddb %xmm2, %xmm0
5627 ; SSE3-NEXT: movdqa %xmm0, %xmm1
5628 ; SSE3-NEXT: psrlw $4, %xmm1
5629 ; SSE3-NEXT: paddb %xmm0, %xmm1
5630 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5631 ; SSE3-NEXT: movdqa %xmm1, %xmm2
5632 ; SSE3-NEXT: psllw $8, %xmm2
5633 ; SSE3-NEXT: paddb %xmm1, %xmm2
5634 ; SSE3-NEXT: psrlw $8, %xmm2
5635 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15]
5636 ; SSE3-NEXT: pcmpgtw %xmm2, %xmm0
5639 ; SSSE3-LABEL: ult_15_v8i16:
5641 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5642 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
5643 ; SSSE3-NEXT: pand %xmm1, %xmm2
5644 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5645 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
5646 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
5647 ; SSSE3-NEXT: psrlw $4, %xmm0
5648 ; SSSE3-NEXT: pand %xmm1, %xmm0
5649 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
5650 ; SSSE3-NEXT: paddb %xmm4, %xmm3
5651 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
5652 ; SSSE3-NEXT: psllw $8, %xmm1
5653 ; SSSE3-NEXT: paddb %xmm3, %xmm1
5654 ; SSSE3-NEXT: psrlw $8, %xmm1
5655 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15]
5656 ; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
5659 ; SSE41-LABEL: ult_15_v8i16:
5661 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5662 ; SSE41-NEXT: movdqa %xmm0, %xmm2
5663 ; SSE41-NEXT: pand %xmm1, %xmm2
5664 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5665 ; SSE41-NEXT: movdqa %xmm3, %xmm4
5666 ; SSE41-NEXT: pshufb %xmm2, %xmm4
5667 ; SSE41-NEXT: psrlw $4, %xmm0
5668 ; SSE41-NEXT: pand %xmm1, %xmm0
5669 ; SSE41-NEXT: pshufb %xmm0, %xmm3
5670 ; SSE41-NEXT: paddb %xmm4, %xmm3
5671 ; SSE41-NEXT: movdqa %xmm3, %xmm1
5672 ; SSE41-NEXT: psllw $8, %xmm1
5673 ; SSE41-NEXT: paddb %xmm3, %xmm1
5674 ; SSE41-NEXT: psrlw $8, %xmm1
5675 ; SSE41-NEXT: pmovsxbw {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15]
5676 ; SSE41-NEXT: pcmpgtw %xmm1, %xmm0
5679 ; AVX1-LABEL: ult_15_v8i16:
5681 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5682 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
5683 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5684 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
5685 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
5686 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
5687 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
5688 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
5689 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
5690 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
5691 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
5692 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15]
5693 ; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5696 ; AVX2-LABEL: ult_15_v8i16:
5698 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5699 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
5700 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5701 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
5702 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
5703 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
5704 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
5705 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
5706 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm1
5707 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0
5708 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
5709 ; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15]
5710 ; AVX2-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5713 ; AVX512VPOPCNTDQ-LABEL: ult_15_v8i16:
5714 ; AVX512VPOPCNTDQ: # %bb.0:
5715 ; AVX512VPOPCNTDQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5716 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
5717 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
5718 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastw {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15]
5719 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5720 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
5721 ; AVX512VPOPCNTDQ-NEXT: retq
5723 ; AVX512VPOPCNTDQVL-LABEL: ult_15_v8i16:
5724 ; AVX512VPOPCNTDQVL: # %bb.0:
5725 ; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5726 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
5727 ; AVX512VPOPCNTDQVL-NEXT: vpmovdw %ymm0, %xmm0
5728 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15]
5729 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5730 ; AVX512VPOPCNTDQVL-NEXT: vzeroupper
5731 ; AVX512VPOPCNTDQVL-NEXT: retq
5733 ; BITALG_NOVLX-LABEL: ult_15_v8i16:
5734 ; BITALG_NOVLX: # %bb.0:
5735 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
5736 ; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0
5737 ; BITALG_NOVLX-NEXT: vpbroadcastw {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15]
5738 ; BITALG_NOVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5739 ; BITALG_NOVLX-NEXT: vzeroupper
5740 ; BITALG_NOVLX-NEXT: retq
5742 ; BITALG-LABEL: ult_15_v8i16:
5744 ; BITALG-NEXT: vpopcntw %xmm0, %xmm0
5745 ; BITALG-NEXT: vpbroadcastw {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15]
5746 ; BITALG-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
5748 %2 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0)
5749 %3 = icmp ult <8 x i16> %2, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
5750 %4 = sext <8 x i1> %3 to <8 x i16>
5754 define <4 x i32> @ugt_1_v4i32(<4 x i32> %0) {
5755 ; SSE-LABEL: ugt_1_v4i32:
5757 ; SSE-NEXT: pcmpeqd %xmm1, %xmm1
5758 ; SSE-NEXT: movdqa %xmm0, %xmm2
5759 ; SSE-NEXT: paddd %xmm1, %xmm2
5760 ; SSE-NEXT: pand %xmm2, %xmm0
5761 ; SSE-NEXT: pxor %xmm2, %xmm2
5762 ; SSE-NEXT: pcmpeqd %xmm2, %xmm0
5763 ; SSE-NEXT: pxor %xmm1, %xmm0
5766 ; AVX1-LABEL: ugt_1_v4i32:
5768 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
5769 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm2
5770 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
5771 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
5772 ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
5773 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
5776 ; AVX2-LABEL: ugt_1_v4i32:
5778 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
5779 ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm2
5780 ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
5781 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
5782 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
5783 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
5786 ; AVX512VPOPCNTDQ-LABEL: ugt_1_v4i32:
5787 ; AVX512VPOPCNTDQ: # %bb.0:
5788 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
5789 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
5790 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
5791 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
5792 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
5793 ; AVX512VPOPCNTDQ-NEXT: retq
5795 ; AVX512VPOPCNTDQVL-LABEL: ugt_1_v4i32:
5796 ; AVX512VPOPCNTDQVL: # %bb.0:
5797 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
5798 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
5799 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
5800 ; AVX512VPOPCNTDQVL-NEXT: retq
5802 ; BITALG_NOVLX-LABEL: ugt_1_v4i32:
5803 ; BITALG_NOVLX: # %bb.0:
5804 ; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
5805 ; BITALG_NOVLX-NEXT: vpaddd %xmm1, %xmm0, %xmm1
5806 ; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
5807 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
5808 ; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
5809 ; BITALG_NOVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
5810 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
5811 ; BITALG_NOVLX-NEXT: vzeroupper
5812 ; BITALG_NOVLX-NEXT: retq
5814 ; BITALG-LABEL: ugt_1_v4i32:
5816 ; BITALG-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
5817 ; BITALG-NEXT: vpaddd %xmm1, %xmm0, %xmm1
5818 ; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm0
5819 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
5820 ; BITALG-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
5821 ; BITALG-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
5823 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
5824 %3 = icmp ugt <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1>
5825 %4 = sext <4 x i1> %3 to <4 x i32>
5829 define <4 x i32> @ult_2_v4i32(<4 x i32> %0) {
5830 ; SSE-LABEL: ult_2_v4i32:
5832 ; SSE-NEXT: pcmpeqd %xmm1, %xmm1
5833 ; SSE-NEXT: paddd %xmm0, %xmm1
5834 ; SSE-NEXT: pand %xmm1, %xmm0
5835 ; SSE-NEXT: pxor %xmm1, %xmm1
5836 ; SSE-NEXT: pcmpeqd %xmm1, %xmm0
5839 ; AVX1-LABEL: ult_2_v4i32:
5841 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
5842 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
5843 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
5844 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
5845 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
5848 ; AVX2-LABEL: ult_2_v4i32:
5850 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
5851 ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm1
5852 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
5853 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
5854 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
5857 ; AVX512VPOPCNTDQ-LABEL: ult_2_v4i32:
5858 ; AVX512VPOPCNTDQ: # %bb.0:
5859 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
5860 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
5861 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
5862 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
5863 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
5864 ; AVX512VPOPCNTDQ-NEXT: retq
5866 ; AVX512VPOPCNTDQVL-LABEL: ult_2_v4i32:
5867 ; AVX512VPOPCNTDQVL: # %bb.0:
5868 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
5869 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
5870 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
5871 ; AVX512VPOPCNTDQVL-NEXT: retq
5873 ; BITALG_NOVLX-LABEL: ult_2_v4i32:
5874 ; BITALG_NOVLX: # %bb.0:
5875 ; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
5876 ; BITALG_NOVLX-NEXT: vpaddd %xmm1, %xmm0, %xmm1
5877 ; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
5878 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
5879 ; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
5880 ; BITALG_NOVLX-NEXT: retq
5882 ; BITALG-LABEL: ult_2_v4i32:
5884 ; BITALG-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
5885 ; BITALG-NEXT: vpaddd %xmm1, %xmm0, %xmm1
5886 ; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm0
5887 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
5888 ; BITALG-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
5890 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
5891 %3 = icmp ult <4 x i32> %2, <i32 2, i32 2, i32 2, i32 2>
5892 %4 = sext <4 x i1> %3 to <4 x i32>
5896 define <4 x i32> @ugt_2_v4i32(<4 x i32> %0) {
5897 ; SSE2-LABEL: ugt_2_v4i32:
5899 ; SSE2-NEXT: movdqa %xmm0, %xmm1
5900 ; SSE2-NEXT: psrlw $1, %xmm1
5901 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5902 ; SSE2-NEXT: psubb %xmm1, %xmm0
5903 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5904 ; SSE2-NEXT: movdqa %xmm0, %xmm2
5905 ; SSE2-NEXT: pand %xmm1, %xmm2
5906 ; SSE2-NEXT: psrlw $2, %xmm0
5907 ; SSE2-NEXT: pand %xmm1, %xmm0
5908 ; SSE2-NEXT: paddb %xmm2, %xmm0
5909 ; SSE2-NEXT: movdqa %xmm0, %xmm1
5910 ; SSE2-NEXT: psrlw $4, %xmm1
5911 ; SSE2-NEXT: paddb %xmm1, %xmm0
5912 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5913 ; SSE2-NEXT: pxor %xmm1, %xmm1
5914 ; SSE2-NEXT: movdqa %xmm0, %xmm2
5915 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
5916 ; SSE2-NEXT: psadbw %xmm1, %xmm2
5917 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
5918 ; SSE2-NEXT: psadbw %xmm1, %xmm0
5919 ; SSE2-NEXT: packuswb %xmm2, %xmm0
5920 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5923 ; SSE3-LABEL: ugt_2_v4i32:
5925 ; SSE3-NEXT: movdqa %xmm0, %xmm1
5926 ; SSE3-NEXT: psrlw $1, %xmm1
5927 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5928 ; SSE3-NEXT: psubb %xmm1, %xmm0
5929 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
5930 ; SSE3-NEXT: movdqa %xmm0, %xmm2
5931 ; SSE3-NEXT: pand %xmm1, %xmm2
5932 ; SSE3-NEXT: psrlw $2, %xmm0
5933 ; SSE3-NEXT: pand %xmm1, %xmm0
5934 ; SSE3-NEXT: paddb %xmm2, %xmm0
5935 ; SSE3-NEXT: movdqa %xmm0, %xmm1
5936 ; SSE3-NEXT: psrlw $4, %xmm1
5937 ; SSE3-NEXT: paddb %xmm1, %xmm0
5938 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5939 ; SSE3-NEXT: pxor %xmm1, %xmm1
5940 ; SSE3-NEXT: movdqa %xmm0, %xmm2
5941 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
5942 ; SSE3-NEXT: psadbw %xmm1, %xmm2
5943 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
5944 ; SSE3-NEXT: psadbw %xmm1, %xmm0
5945 ; SSE3-NEXT: packuswb %xmm2, %xmm0
5946 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5949 ; SSSE3-LABEL: ugt_2_v4i32:
5951 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5952 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
5953 ; SSSE3-NEXT: pand %xmm2, %xmm3
5954 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5955 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
5956 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
5957 ; SSSE3-NEXT: psrlw $4, %xmm0
5958 ; SSSE3-NEXT: pand %xmm2, %xmm0
5959 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
5960 ; SSSE3-NEXT: paddb %xmm4, %xmm1
5961 ; SSSE3-NEXT: pxor %xmm0, %xmm0
5962 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
5963 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
5964 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
5965 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
5966 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
5967 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
5968 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
5969 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
5972 ; SSE41-LABEL: ugt_2_v4i32:
5974 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5975 ; SSE41-NEXT: movdqa %xmm0, %xmm2
5976 ; SSE41-NEXT: pand %xmm1, %xmm2
5977 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5978 ; SSE41-NEXT: movdqa %xmm3, %xmm4
5979 ; SSE41-NEXT: pshufb %xmm2, %xmm4
5980 ; SSE41-NEXT: psrlw $4, %xmm0
5981 ; SSE41-NEXT: pand %xmm1, %xmm0
5982 ; SSE41-NEXT: pshufb %xmm0, %xmm3
5983 ; SSE41-NEXT: paddb %xmm4, %xmm3
5984 ; SSE41-NEXT: pxor %xmm1, %xmm1
5985 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
5986 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
5987 ; SSE41-NEXT: psadbw %xmm1, %xmm3
5988 ; SSE41-NEXT: psadbw %xmm1, %xmm0
5989 ; SSE41-NEXT: packuswb %xmm3, %xmm0
5990 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
5993 ; AVX1-LABEL: ugt_2_v4i32:
5995 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
5996 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
5997 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
5998 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
5999 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
6000 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
6001 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
6002 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
6003 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
6004 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6005 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6006 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6007 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6008 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6009 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
6012 ; AVX2-LABEL: ugt_2_v4i32:
6014 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6015 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
6016 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6017 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
6018 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
6019 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
6020 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
6021 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
6022 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
6023 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6024 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6025 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6026 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6027 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6028 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
6029 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
6032 ; AVX512VPOPCNTDQ-LABEL: ugt_2_v4i32:
6033 ; AVX512VPOPCNTDQ: # %bb.0:
6034 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6035 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
6036 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
6037 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
6038 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
6039 ; AVX512VPOPCNTDQ-NEXT: retq
6041 ; AVX512VPOPCNTDQVL-LABEL: ugt_2_v4i32:
6042 ; AVX512VPOPCNTDQVL: # %bb.0:
6043 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
6044 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
6045 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
6046 ; AVX512VPOPCNTDQVL-NEXT: retq
6048 ; BITALG_NOVLX-LABEL: ugt_2_v4i32:
6049 ; BITALG_NOVLX: # %bb.0:
6050 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6051 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
6052 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
6053 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6054 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6055 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6056 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6057 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6058 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
6059 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
6060 ; BITALG_NOVLX-NEXT: vzeroupper
6061 ; BITALG_NOVLX-NEXT: retq
6063 ; BITALG-LABEL: ugt_2_v4i32:
6065 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
6066 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
6067 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6068 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6069 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6070 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6071 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6072 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
6073 ; BITALG-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
6075 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
6076 %3 = icmp ugt <4 x i32> %2, <i32 2, i32 2, i32 2, i32 2>
6077 %4 = sext <4 x i1> %3 to <4 x i32>
6081 define <4 x i32> @ult_3_v4i32(<4 x i32> %0) {
6082 ; SSE2-LABEL: ult_3_v4i32:
6084 ; SSE2-NEXT: movdqa %xmm0, %xmm1
6085 ; SSE2-NEXT: psrlw $1, %xmm1
6086 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6087 ; SSE2-NEXT: psubb %xmm1, %xmm0
6088 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
6089 ; SSE2-NEXT: movdqa %xmm0, %xmm2
6090 ; SSE2-NEXT: pand %xmm1, %xmm2
6091 ; SSE2-NEXT: psrlw $2, %xmm0
6092 ; SSE2-NEXT: pand %xmm1, %xmm0
6093 ; SSE2-NEXT: paddb %xmm2, %xmm0
6094 ; SSE2-NEXT: movdqa %xmm0, %xmm1
6095 ; SSE2-NEXT: psrlw $4, %xmm1
6096 ; SSE2-NEXT: paddb %xmm0, %xmm1
6097 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6098 ; SSE2-NEXT: pxor %xmm0, %xmm0
6099 ; SSE2-NEXT: movdqa %xmm1, %xmm2
6100 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
6101 ; SSE2-NEXT: psadbw %xmm0, %xmm2
6102 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6103 ; SSE2-NEXT: psadbw %xmm0, %xmm1
6104 ; SSE2-NEXT: packuswb %xmm2, %xmm1
6105 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3]
6106 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
6109 ; SSE3-LABEL: ult_3_v4i32:
6111 ; SSE3-NEXT: movdqa %xmm0, %xmm1
6112 ; SSE3-NEXT: psrlw $1, %xmm1
6113 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6114 ; SSE3-NEXT: psubb %xmm1, %xmm0
6115 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
6116 ; SSE3-NEXT: movdqa %xmm0, %xmm2
6117 ; SSE3-NEXT: pand %xmm1, %xmm2
6118 ; SSE3-NEXT: psrlw $2, %xmm0
6119 ; SSE3-NEXT: pand %xmm1, %xmm0
6120 ; SSE3-NEXT: paddb %xmm2, %xmm0
6121 ; SSE3-NEXT: movdqa %xmm0, %xmm1
6122 ; SSE3-NEXT: psrlw $4, %xmm1
6123 ; SSE3-NEXT: paddb %xmm0, %xmm1
6124 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6125 ; SSE3-NEXT: pxor %xmm0, %xmm0
6126 ; SSE3-NEXT: movdqa %xmm1, %xmm2
6127 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
6128 ; SSE3-NEXT: psadbw %xmm0, %xmm2
6129 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6130 ; SSE3-NEXT: psadbw %xmm0, %xmm1
6131 ; SSE3-NEXT: packuswb %xmm2, %xmm1
6132 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3]
6133 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
6136 ; SSSE3-LABEL: ult_3_v4i32:
6138 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6139 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
6140 ; SSSE3-NEXT: pand %xmm1, %xmm2
6141 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6142 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
6143 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
6144 ; SSSE3-NEXT: psrlw $4, %xmm0
6145 ; SSSE3-NEXT: pand %xmm1, %xmm0
6146 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
6147 ; SSSE3-NEXT: paddb %xmm4, %xmm3
6148 ; SSSE3-NEXT: pxor %xmm0, %xmm0
6149 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
6150 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
6151 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
6152 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
6153 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
6154 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
6155 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3]
6156 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
6159 ; SSE41-LABEL: ult_3_v4i32:
6161 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6162 ; SSE41-NEXT: movdqa %xmm0, %xmm2
6163 ; SSE41-NEXT: pand %xmm1, %xmm2
6164 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6165 ; SSE41-NEXT: movdqa %xmm3, %xmm4
6166 ; SSE41-NEXT: pshufb %xmm2, %xmm4
6167 ; SSE41-NEXT: psrlw $4, %xmm0
6168 ; SSE41-NEXT: pand %xmm1, %xmm0
6169 ; SSE41-NEXT: pshufb %xmm0, %xmm3
6170 ; SSE41-NEXT: paddb %xmm4, %xmm3
6171 ; SSE41-NEXT: pxor %xmm0, %xmm0
6172 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
6173 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
6174 ; SSE41-NEXT: psadbw %xmm0, %xmm3
6175 ; SSE41-NEXT: psadbw %xmm0, %xmm1
6176 ; SSE41-NEXT: packuswb %xmm3, %xmm1
6177 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [3,3,3,3]
6178 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
6181 ; AVX1-LABEL: ult_3_v4i32:
6183 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6184 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
6185 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6186 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
6187 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
6188 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
6189 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
6190 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
6191 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
6192 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6193 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6194 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6195 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6196 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6197 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [3,3,3,3]
6198 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
6201 ; AVX2-LABEL: ult_3_v4i32:
6203 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6204 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
6205 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6206 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
6207 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
6208 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
6209 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
6210 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
6211 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
6212 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6213 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6214 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6215 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6216 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6217 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3]
6218 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
6221 ; AVX512VPOPCNTDQ-LABEL: ult_3_v4i32:
6222 ; AVX512VPOPCNTDQ: # %bb.0:
6223 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6224 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
6225 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3]
6226 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
6227 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
6228 ; AVX512VPOPCNTDQ-NEXT: retq
6230 ; AVX512VPOPCNTDQVL-LABEL: ult_3_v4i32:
6231 ; AVX512VPOPCNTDQVL: # %bb.0:
6232 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
6233 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3]
6234 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
6235 ; AVX512VPOPCNTDQVL-NEXT: retq
6237 ; BITALG_NOVLX-LABEL: ult_3_v4i32:
6238 ; BITALG_NOVLX: # %bb.0:
6239 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6240 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
6241 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
6242 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6243 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6244 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6245 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6246 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6247 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3]
6248 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
6249 ; BITALG_NOVLX-NEXT: vzeroupper
6250 ; BITALG_NOVLX-NEXT: retq
6252 ; BITALG-LABEL: ult_3_v4i32:
6254 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
6255 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
6256 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6257 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6258 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6259 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6260 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6261 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3]
6262 ; BITALG-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
6264 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
6265 %3 = icmp ult <4 x i32> %2, <i32 3, i32 3, i32 3, i32 3>
6266 %4 = sext <4 x i1> %3 to <4 x i32>
6270 define <4 x i32> @ugt_3_v4i32(<4 x i32> %0) {
6271 ; SSE2-LABEL: ugt_3_v4i32:
6273 ; SSE2-NEXT: movdqa %xmm0, %xmm1
6274 ; SSE2-NEXT: psrlw $1, %xmm1
6275 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6276 ; SSE2-NEXT: psubb %xmm1, %xmm0
6277 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
6278 ; SSE2-NEXT: movdqa %xmm0, %xmm2
6279 ; SSE2-NEXT: pand %xmm1, %xmm2
6280 ; SSE2-NEXT: psrlw $2, %xmm0
6281 ; SSE2-NEXT: pand %xmm1, %xmm0
6282 ; SSE2-NEXT: paddb %xmm2, %xmm0
6283 ; SSE2-NEXT: movdqa %xmm0, %xmm1
6284 ; SSE2-NEXT: psrlw $4, %xmm1
6285 ; SSE2-NEXT: paddb %xmm1, %xmm0
6286 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
6287 ; SSE2-NEXT: pxor %xmm1, %xmm1
6288 ; SSE2-NEXT: movdqa %xmm0, %xmm2
6289 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
6290 ; SSE2-NEXT: psadbw %xmm1, %xmm2
6291 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
6292 ; SSE2-NEXT: psadbw %xmm1, %xmm0
6293 ; SSE2-NEXT: packuswb %xmm2, %xmm0
6294 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
6297 ; SSE3-LABEL: ugt_3_v4i32:
6299 ; SSE3-NEXT: movdqa %xmm0, %xmm1
6300 ; SSE3-NEXT: psrlw $1, %xmm1
6301 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6302 ; SSE3-NEXT: psubb %xmm1, %xmm0
6303 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
6304 ; SSE3-NEXT: movdqa %xmm0, %xmm2
6305 ; SSE3-NEXT: pand %xmm1, %xmm2
6306 ; SSE3-NEXT: psrlw $2, %xmm0
6307 ; SSE3-NEXT: pand %xmm1, %xmm0
6308 ; SSE3-NEXT: paddb %xmm2, %xmm0
6309 ; SSE3-NEXT: movdqa %xmm0, %xmm1
6310 ; SSE3-NEXT: psrlw $4, %xmm1
6311 ; SSE3-NEXT: paddb %xmm1, %xmm0
6312 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
6313 ; SSE3-NEXT: pxor %xmm1, %xmm1
6314 ; SSE3-NEXT: movdqa %xmm0, %xmm2
6315 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
6316 ; SSE3-NEXT: psadbw %xmm1, %xmm2
6317 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
6318 ; SSE3-NEXT: psadbw %xmm1, %xmm0
6319 ; SSE3-NEXT: packuswb %xmm2, %xmm0
6320 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
6323 ; SSSE3-LABEL: ugt_3_v4i32:
6325 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6326 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
6327 ; SSSE3-NEXT: pand %xmm2, %xmm3
6328 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6329 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
6330 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
6331 ; SSSE3-NEXT: psrlw $4, %xmm0
6332 ; SSSE3-NEXT: pand %xmm2, %xmm0
6333 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
6334 ; SSSE3-NEXT: paddb %xmm4, %xmm1
6335 ; SSSE3-NEXT: pxor %xmm0, %xmm0
6336 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
6337 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
6338 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
6339 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6340 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
6341 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
6342 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6343 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
6346 ; SSE41-LABEL: ugt_3_v4i32:
6348 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6349 ; SSE41-NEXT: movdqa %xmm0, %xmm2
6350 ; SSE41-NEXT: pand %xmm1, %xmm2
6351 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6352 ; SSE41-NEXT: movdqa %xmm3, %xmm4
6353 ; SSE41-NEXT: pshufb %xmm2, %xmm4
6354 ; SSE41-NEXT: psrlw $4, %xmm0
6355 ; SSE41-NEXT: pand %xmm1, %xmm0
6356 ; SSE41-NEXT: pshufb %xmm0, %xmm3
6357 ; SSE41-NEXT: paddb %xmm4, %xmm3
6358 ; SSE41-NEXT: pxor %xmm1, %xmm1
6359 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
6360 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
6361 ; SSE41-NEXT: psadbw %xmm1, %xmm3
6362 ; SSE41-NEXT: psadbw %xmm1, %xmm0
6363 ; SSE41-NEXT: packuswb %xmm3, %xmm0
6364 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
6367 ; AVX1-LABEL: ugt_3_v4i32:
6369 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6370 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
6371 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6372 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
6373 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
6374 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
6375 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
6376 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
6377 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
6378 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6379 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6380 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6381 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6382 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6383 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
6386 ; AVX2-LABEL: ugt_3_v4i32:
6388 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6389 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
6390 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6391 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
6392 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
6393 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
6394 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
6395 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
6396 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
6397 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6398 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6399 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6400 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6401 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6402 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3]
6403 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
6406 ; AVX512VPOPCNTDQ-LABEL: ugt_3_v4i32:
6407 ; AVX512VPOPCNTDQ: # %bb.0:
6408 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6409 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
6410 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3]
6411 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
6412 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
6413 ; AVX512VPOPCNTDQ-NEXT: retq
6415 ; AVX512VPOPCNTDQVL-LABEL: ugt_3_v4i32:
6416 ; AVX512VPOPCNTDQVL: # %bb.0:
6417 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
6418 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3]
6419 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
6420 ; AVX512VPOPCNTDQVL-NEXT: retq
6422 ; BITALG_NOVLX-LABEL: ugt_3_v4i32:
6423 ; BITALG_NOVLX: # %bb.0:
6424 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6425 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
6426 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
6427 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6428 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6429 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6430 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6431 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6432 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3]
6433 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
6434 ; BITALG_NOVLX-NEXT: vzeroupper
6435 ; BITALG_NOVLX-NEXT: retq
6437 ; BITALG-LABEL: ugt_3_v4i32:
6439 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
6440 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
6441 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6442 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6443 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6444 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6445 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6446 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3,3,3,3]
6447 ; BITALG-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
6449 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
6450 %3 = icmp ugt <4 x i32> %2, <i32 3, i32 3, i32 3, i32 3>
6451 %4 = sext <4 x i1> %3 to <4 x i32>
6455 define <4 x i32> @ult_4_v4i32(<4 x i32> %0) {
6456 ; SSE2-LABEL: ult_4_v4i32:
6458 ; SSE2-NEXT: movdqa %xmm0, %xmm1
6459 ; SSE2-NEXT: psrlw $1, %xmm1
6460 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6461 ; SSE2-NEXT: psubb %xmm1, %xmm0
6462 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
6463 ; SSE2-NEXT: movdqa %xmm0, %xmm2
6464 ; SSE2-NEXT: pand %xmm1, %xmm2
6465 ; SSE2-NEXT: psrlw $2, %xmm0
6466 ; SSE2-NEXT: pand %xmm1, %xmm0
6467 ; SSE2-NEXT: paddb %xmm2, %xmm0
6468 ; SSE2-NEXT: movdqa %xmm0, %xmm1
6469 ; SSE2-NEXT: psrlw $4, %xmm1
6470 ; SSE2-NEXT: paddb %xmm0, %xmm1
6471 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6472 ; SSE2-NEXT: pxor %xmm0, %xmm0
6473 ; SSE2-NEXT: movdqa %xmm1, %xmm2
6474 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
6475 ; SSE2-NEXT: psadbw %xmm0, %xmm2
6476 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6477 ; SSE2-NEXT: psadbw %xmm0, %xmm1
6478 ; SSE2-NEXT: packuswb %xmm2, %xmm1
6479 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4]
6480 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
6483 ; SSE3-LABEL: ult_4_v4i32:
6485 ; SSE3-NEXT: movdqa %xmm0, %xmm1
6486 ; SSE3-NEXT: psrlw $1, %xmm1
6487 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6488 ; SSE3-NEXT: psubb %xmm1, %xmm0
6489 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
6490 ; SSE3-NEXT: movdqa %xmm0, %xmm2
6491 ; SSE3-NEXT: pand %xmm1, %xmm2
6492 ; SSE3-NEXT: psrlw $2, %xmm0
6493 ; SSE3-NEXT: pand %xmm1, %xmm0
6494 ; SSE3-NEXT: paddb %xmm2, %xmm0
6495 ; SSE3-NEXT: movdqa %xmm0, %xmm1
6496 ; SSE3-NEXT: psrlw $4, %xmm1
6497 ; SSE3-NEXT: paddb %xmm0, %xmm1
6498 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6499 ; SSE3-NEXT: pxor %xmm0, %xmm0
6500 ; SSE3-NEXT: movdqa %xmm1, %xmm2
6501 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
6502 ; SSE3-NEXT: psadbw %xmm0, %xmm2
6503 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6504 ; SSE3-NEXT: psadbw %xmm0, %xmm1
6505 ; SSE3-NEXT: packuswb %xmm2, %xmm1
6506 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4]
6507 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
6510 ; SSSE3-LABEL: ult_4_v4i32:
6512 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6513 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
6514 ; SSSE3-NEXT: pand %xmm1, %xmm2
6515 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6516 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
6517 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
6518 ; SSSE3-NEXT: psrlw $4, %xmm0
6519 ; SSSE3-NEXT: pand %xmm1, %xmm0
6520 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
6521 ; SSSE3-NEXT: paddb %xmm4, %xmm3
6522 ; SSSE3-NEXT: pxor %xmm0, %xmm0
6523 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
6524 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
6525 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
6526 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
6527 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
6528 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
6529 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4]
6530 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
6533 ; SSE41-LABEL: ult_4_v4i32:
6535 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6536 ; SSE41-NEXT: movdqa %xmm0, %xmm2
6537 ; SSE41-NEXT: pand %xmm1, %xmm2
6538 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6539 ; SSE41-NEXT: movdqa %xmm3, %xmm4
6540 ; SSE41-NEXT: pshufb %xmm2, %xmm4
6541 ; SSE41-NEXT: psrlw $4, %xmm0
6542 ; SSE41-NEXT: pand %xmm1, %xmm0
6543 ; SSE41-NEXT: pshufb %xmm0, %xmm3
6544 ; SSE41-NEXT: paddb %xmm4, %xmm3
6545 ; SSE41-NEXT: pxor %xmm0, %xmm0
6546 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
6547 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
6548 ; SSE41-NEXT: psadbw %xmm0, %xmm3
6549 ; SSE41-NEXT: psadbw %xmm0, %xmm1
6550 ; SSE41-NEXT: packuswb %xmm3, %xmm1
6551 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [4,4,4,4]
6552 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
6555 ; AVX1-LABEL: ult_4_v4i32:
6557 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6558 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
6559 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6560 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
6561 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
6562 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
6563 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
6564 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
6565 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
6566 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6567 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6568 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6569 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6570 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6571 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [4,4,4,4]
6572 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
6575 ; AVX2-LABEL: ult_4_v4i32:
6577 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6578 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
6579 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6580 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
6581 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
6582 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
6583 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
6584 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
6585 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
6586 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6587 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6588 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6589 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6590 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6591 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
6592 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
6595 ; AVX512VPOPCNTDQ-LABEL: ult_4_v4i32:
6596 ; AVX512VPOPCNTDQ: # %bb.0:
6597 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6598 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
6599 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
6600 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
6601 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
6602 ; AVX512VPOPCNTDQ-NEXT: retq
6604 ; AVX512VPOPCNTDQVL-LABEL: ult_4_v4i32:
6605 ; AVX512VPOPCNTDQVL: # %bb.0:
6606 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
6607 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
6608 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
6609 ; AVX512VPOPCNTDQVL-NEXT: retq
6611 ; BITALG_NOVLX-LABEL: ult_4_v4i32:
6612 ; BITALG_NOVLX: # %bb.0:
6613 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6614 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
6615 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
6616 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6617 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6618 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6619 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6620 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6621 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
6622 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
6623 ; BITALG_NOVLX-NEXT: vzeroupper
6624 ; BITALG_NOVLX-NEXT: retq
6626 ; BITALG-LABEL: ult_4_v4i32:
6628 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
6629 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
6630 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6631 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6632 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6633 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6634 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6635 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
6636 ; BITALG-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
6638 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
6639 %3 = icmp ult <4 x i32> %2, <i32 4, i32 4, i32 4, i32 4>
6640 %4 = sext <4 x i1> %3 to <4 x i32>
6644 define <4 x i32> @ugt_4_v4i32(<4 x i32> %0) {
6645 ; SSE2-LABEL: ugt_4_v4i32:
6647 ; SSE2-NEXT: movdqa %xmm0, %xmm1
6648 ; SSE2-NEXT: psrlw $1, %xmm1
6649 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6650 ; SSE2-NEXT: psubb %xmm1, %xmm0
6651 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
6652 ; SSE2-NEXT: movdqa %xmm0, %xmm2
6653 ; SSE2-NEXT: pand %xmm1, %xmm2
6654 ; SSE2-NEXT: psrlw $2, %xmm0
6655 ; SSE2-NEXT: pand %xmm1, %xmm0
6656 ; SSE2-NEXT: paddb %xmm2, %xmm0
6657 ; SSE2-NEXT: movdqa %xmm0, %xmm1
6658 ; SSE2-NEXT: psrlw $4, %xmm1
6659 ; SSE2-NEXT: paddb %xmm1, %xmm0
6660 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
6661 ; SSE2-NEXT: pxor %xmm1, %xmm1
6662 ; SSE2-NEXT: movdqa %xmm0, %xmm2
6663 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
6664 ; SSE2-NEXT: psadbw %xmm1, %xmm2
6665 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
6666 ; SSE2-NEXT: psadbw %xmm1, %xmm0
6667 ; SSE2-NEXT: packuswb %xmm2, %xmm0
6668 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
6671 ; SSE3-LABEL: ugt_4_v4i32:
6673 ; SSE3-NEXT: movdqa %xmm0, %xmm1
6674 ; SSE3-NEXT: psrlw $1, %xmm1
6675 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6676 ; SSE3-NEXT: psubb %xmm1, %xmm0
6677 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
6678 ; SSE3-NEXT: movdqa %xmm0, %xmm2
6679 ; SSE3-NEXT: pand %xmm1, %xmm2
6680 ; SSE3-NEXT: psrlw $2, %xmm0
6681 ; SSE3-NEXT: pand %xmm1, %xmm0
6682 ; SSE3-NEXT: paddb %xmm2, %xmm0
6683 ; SSE3-NEXT: movdqa %xmm0, %xmm1
6684 ; SSE3-NEXT: psrlw $4, %xmm1
6685 ; SSE3-NEXT: paddb %xmm1, %xmm0
6686 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
6687 ; SSE3-NEXT: pxor %xmm1, %xmm1
6688 ; SSE3-NEXT: movdqa %xmm0, %xmm2
6689 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
6690 ; SSE3-NEXT: psadbw %xmm1, %xmm2
6691 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
6692 ; SSE3-NEXT: psadbw %xmm1, %xmm0
6693 ; SSE3-NEXT: packuswb %xmm2, %xmm0
6694 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
6697 ; SSSE3-LABEL: ugt_4_v4i32:
6699 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6700 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
6701 ; SSSE3-NEXT: pand %xmm2, %xmm3
6702 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6703 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
6704 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
6705 ; SSSE3-NEXT: psrlw $4, %xmm0
6706 ; SSSE3-NEXT: pand %xmm2, %xmm0
6707 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
6708 ; SSSE3-NEXT: paddb %xmm4, %xmm1
6709 ; SSSE3-NEXT: pxor %xmm0, %xmm0
6710 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
6711 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
6712 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
6713 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6714 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
6715 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
6716 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6717 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
6720 ; SSE41-LABEL: ugt_4_v4i32:
6722 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6723 ; SSE41-NEXT: movdqa %xmm0, %xmm2
6724 ; SSE41-NEXT: pand %xmm1, %xmm2
6725 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6726 ; SSE41-NEXT: movdqa %xmm3, %xmm4
6727 ; SSE41-NEXT: pshufb %xmm2, %xmm4
6728 ; SSE41-NEXT: psrlw $4, %xmm0
6729 ; SSE41-NEXT: pand %xmm1, %xmm0
6730 ; SSE41-NEXT: pshufb %xmm0, %xmm3
6731 ; SSE41-NEXT: paddb %xmm4, %xmm3
6732 ; SSE41-NEXT: pxor %xmm1, %xmm1
6733 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
6734 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
6735 ; SSE41-NEXT: psadbw %xmm1, %xmm3
6736 ; SSE41-NEXT: psadbw %xmm1, %xmm0
6737 ; SSE41-NEXT: packuswb %xmm3, %xmm0
6738 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
6741 ; AVX1-LABEL: ugt_4_v4i32:
6743 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6744 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
6745 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6746 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
6747 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
6748 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
6749 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
6750 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
6751 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
6752 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6753 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6754 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6755 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6756 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6757 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
6760 ; AVX2-LABEL: ugt_4_v4i32:
6762 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6763 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
6764 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6765 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
6766 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
6767 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
6768 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
6769 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
6770 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
6771 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6772 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6773 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6774 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6775 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6776 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
6777 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
6780 ; AVX512VPOPCNTDQ-LABEL: ugt_4_v4i32:
6781 ; AVX512VPOPCNTDQ: # %bb.0:
6782 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6783 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
6784 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
6785 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
6786 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
6787 ; AVX512VPOPCNTDQ-NEXT: retq
6789 ; AVX512VPOPCNTDQVL-LABEL: ugt_4_v4i32:
6790 ; AVX512VPOPCNTDQVL: # %bb.0:
6791 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
6792 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
6793 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
6794 ; AVX512VPOPCNTDQVL-NEXT: retq
6796 ; BITALG_NOVLX-LABEL: ugt_4_v4i32:
6797 ; BITALG_NOVLX: # %bb.0:
6798 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6799 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
6800 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
6801 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6802 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6803 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6804 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6805 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6806 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
6807 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
6808 ; BITALG_NOVLX-NEXT: vzeroupper
6809 ; BITALG_NOVLX-NEXT: retq
6811 ; BITALG-LABEL: ugt_4_v4i32:
6813 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
6814 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
6815 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6816 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6817 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6818 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6819 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6820 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
6821 ; BITALG-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
6823 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
6824 %3 = icmp ugt <4 x i32> %2, <i32 4, i32 4, i32 4, i32 4>
6825 %4 = sext <4 x i1> %3 to <4 x i32>
6829 define <4 x i32> @ult_5_v4i32(<4 x i32> %0) {
6830 ; SSE2-LABEL: ult_5_v4i32:
6832 ; SSE2-NEXT: movdqa %xmm0, %xmm1
6833 ; SSE2-NEXT: psrlw $1, %xmm1
6834 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6835 ; SSE2-NEXT: psubb %xmm1, %xmm0
6836 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
6837 ; SSE2-NEXT: movdqa %xmm0, %xmm2
6838 ; SSE2-NEXT: pand %xmm1, %xmm2
6839 ; SSE2-NEXT: psrlw $2, %xmm0
6840 ; SSE2-NEXT: pand %xmm1, %xmm0
6841 ; SSE2-NEXT: paddb %xmm2, %xmm0
6842 ; SSE2-NEXT: movdqa %xmm0, %xmm1
6843 ; SSE2-NEXT: psrlw $4, %xmm1
6844 ; SSE2-NEXT: paddb %xmm0, %xmm1
6845 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6846 ; SSE2-NEXT: pxor %xmm0, %xmm0
6847 ; SSE2-NEXT: movdqa %xmm1, %xmm2
6848 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
6849 ; SSE2-NEXT: psadbw %xmm0, %xmm2
6850 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6851 ; SSE2-NEXT: psadbw %xmm0, %xmm1
6852 ; SSE2-NEXT: packuswb %xmm2, %xmm1
6853 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5]
6854 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
6857 ; SSE3-LABEL: ult_5_v4i32:
6859 ; SSE3-NEXT: movdqa %xmm0, %xmm1
6860 ; SSE3-NEXT: psrlw $1, %xmm1
6861 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6862 ; SSE3-NEXT: psubb %xmm1, %xmm0
6863 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
6864 ; SSE3-NEXT: movdqa %xmm0, %xmm2
6865 ; SSE3-NEXT: pand %xmm1, %xmm2
6866 ; SSE3-NEXT: psrlw $2, %xmm0
6867 ; SSE3-NEXT: pand %xmm1, %xmm0
6868 ; SSE3-NEXT: paddb %xmm2, %xmm0
6869 ; SSE3-NEXT: movdqa %xmm0, %xmm1
6870 ; SSE3-NEXT: psrlw $4, %xmm1
6871 ; SSE3-NEXT: paddb %xmm0, %xmm1
6872 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
6873 ; SSE3-NEXT: pxor %xmm0, %xmm0
6874 ; SSE3-NEXT: movdqa %xmm1, %xmm2
6875 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
6876 ; SSE3-NEXT: psadbw %xmm0, %xmm2
6877 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
6878 ; SSE3-NEXT: psadbw %xmm0, %xmm1
6879 ; SSE3-NEXT: packuswb %xmm2, %xmm1
6880 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5]
6881 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
6884 ; SSSE3-LABEL: ult_5_v4i32:
6886 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6887 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
6888 ; SSSE3-NEXT: pand %xmm1, %xmm2
6889 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6890 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
6891 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
6892 ; SSSE3-NEXT: psrlw $4, %xmm0
6893 ; SSSE3-NEXT: pand %xmm1, %xmm0
6894 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
6895 ; SSSE3-NEXT: paddb %xmm4, %xmm3
6896 ; SSSE3-NEXT: pxor %xmm0, %xmm0
6897 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
6898 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
6899 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
6900 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
6901 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
6902 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
6903 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5]
6904 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
6907 ; SSE41-LABEL: ult_5_v4i32:
6909 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6910 ; SSE41-NEXT: movdqa %xmm0, %xmm2
6911 ; SSE41-NEXT: pand %xmm1, %xmm2
6912 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6913 ; SSE41-NEXT: movdqa %xmm3, %xmm4
6914 ; SSE41-NEXT: pshufb %xmm2, %xmm4
6915 ; SSE41-NEXT: psrlw $4, %xmm0
6916 ; SSE41-NEXT: pand %xmm1, %xmm0
6917 ; SSE41-NEXT: pshufb %xmm0, %xmm3
6918 ; SSE41-NEXT: paddb %xmm4, %xmm3
6919 ; SSE41-NEXT: pxor %xmm0, %xmm0
6920 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
6921 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
6922 ; SSE41-NEXT: psadbw %xmm0, %xmm3
6923 ; SSE41-NEXT: psadbw %xmm0, %xmm1
6924 ; SSE41-NEXT: packuswb %xmm3, %xmm1
6925 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [5,5,5,5]
6926 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
6929 ; AVX1-LABEL: ult_5_v4i32:
6931 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6932 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
6933 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6934 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
6935 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
6936 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
6937 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
6938 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
6939 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
6940 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6941 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6942 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6943 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6944 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6945 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [5,5,5,5]
6946 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
6949 ; AVX2-LABEL: ult_5_v4i32:
6951 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
6952 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
6953 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
6954 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
6955 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
6956 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
6957 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
6958 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
6959 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
6960 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6961 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6962 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6963 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6964 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6965 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5]
6966 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
6969 ; AVX512VPOPCNTDQ-LABEL: ult_5_v4i32:
6970 ; AVX512VPOPCNTDQ: # %bb.0:
6971 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6972 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
6973 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5]
6974 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
6975 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
6976 ; AVX512VPOPCNTDQ-NEXT: retq
6978 ; AVX512VPOPCNTDQVL-LABEL: ult_5_v4i32:
6979 ; AVX512VPOPCNTDQVL: # %bb.0:
6980 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
6981 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5]
6982 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
6983 ; AVX512VPOPCNTDQVL-NEXT: retq
6985 ; BITALG_NOVLX-LABEL: ult_5_v4i32:
6986 ; BITALG_NOVLX: # %bb.0:
6987 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
6988 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
6989 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
6990 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
6991 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
6992 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
6993 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6994 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
6995 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5]
6996 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
6997 ; BITALG_NOVLX-NEXT: vzeroupper
6998 ; BITALG_NOVLX-NEXT: retq
7000 ; BITALG-LABEL: ult_5_v4i32:
7002 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
7003 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
7004 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7005 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7006 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7007 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7008 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7009 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5]
7010 ; BITALG-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
7012 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
7013 %3 = icmp ult <4 x i32> %2, <i32 5, i32 5, i32 5, i32 5>
7014 %4 = sext <4 x i1> %3 to <4 x i32>
7018 define <4 x i32> @ugt_5_v4i32(<4 x i32> %0) {
7019 ; SSE2-LABEL: ugt_5_v4i32:
7021 ; SSE2-NEXT: movdqa %xmm0, %xmm1
7022 ; SSE2-NEXT: psrlw $1, %xmm1
7023 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7024 ; SSE2-NEXT: psubb %xmm1, %xmm0
7025 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7026 ; SSE2-NEXT: movdqa %xmm0, %xmm2
7027 ; SSE2-NEXT: pand %xmm1, %xmm2
7028 ; SSE2-NEXT: psrlw $2, %xmm0
7029 ; SSE2-NEXT: pand %xmm1, %xmm0
7030 ; SSE2-NEXT: paddb %xmm2, %xmm0
7031 ; SSE2-NEXT: movdqa %xmm0, %xmm1
7032 ; SSE2-NEXT: psrlw $4, %xmm1
7033 ; SSE2-NEXT: paddb %xmm1, %xmm0
7034 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7035 ; SSE2-NEXT: pxor %xmm1, %xmm1
7036 ; SSE2-NEXT: movdqa %xmm0, %xmm2
7037 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
7038 ; SSE2-NEXT: psadbw %xmm1, %xmm2
7039 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
7040 ; SSE2-NEXT: psadbw %xmm1, %xmm0
7041 ; SSE2-NEXT: packuswb %xmm2, %xmm0
7042 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7045 ; SSE3-LABEL: ugt_5_v4i32:
7047 ; SSE3-NEXT: movdqa %xmm0, %xmm1
7048 ; SSE3-NEXT: psrlw $1, %xmm1
7049 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7050 ; SSE3-NEXT: psubb %xmm1, %xmm0
7051 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7052 ; SSE3-NEXT: movdqa %xmm0, %xmm2
7053 ; SSE3-NEXT: pand %xmm1, %xmm2
7054 ; SSE3-NEXT: psrlw $2, %xmm0
7055 ; SSE3-NEXT: pand %xmm1, %xmm0
7056 ; SSE3-NEXT: paddb %xmm2, %xmm0
7057 ; SSE3-NEXT: movdqa %xmm0, %xmm1
7058 ; SSE3-NEXT: psrlw $4, %xmm1
7059 ; SSE3-NEXT: paddb %xmm1, %xmm0
7060 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7061 ; SSE3-NEXT: pxor %xmm1, %xmm1
7062 ; SSE3-NEXT: movdqa %xmm0, %xmm2
7063 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
7064 ; SSE3-NEXT: psadbw %xmm1, %xmm2
7065 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
7066 ; SSE3-NEXT: psadbw %xmm1, %xmm0
7067 ; SSE3-NEXT: packuswb %xmm2, %xmm0
7068 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7071 ; SSSE3-LABEL: ugt_5_v4i32:
7073 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7074 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
7075 ; SSSE3-NEXT: pand %xmm2, %xmm3
7076 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7077 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
7078 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
7079 ; SSSE3-NEXT: psrlw $4, %xmm0
7080 ; SSSE3-NEXT: pand %xmm2, %xmm0
7081 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
7082 ; SSSE3-NEXT: paddb %xmm4, %xmm1
7083 ; SSSE3-NEXT: pxor %xmm0, %xmm0
7084 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
7085 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
7086 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
7087 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
7088 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
7089 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
7090 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7091 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
7094 ; SSE41-LABEL: ugt_5_v4i32:
7096 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7097 ; SSE41-NEXT: movdqa %xmm0, %xmm2
7098 ; SSE41-NEXT: pand %xmm1, %xmm2
7099 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7100 ; SSE41-NEXT: movdqa %xmm3, %xmm4
7101 ; SSE41-NEXT: pshufb %xmm2, %xmm4
7102 ; SSE41-NEXT: psrlw $4, %xmm0
7103 ; SSE41-NEXT: pand %xmm1, %xmm0
7104 ; SSE41-NEXT: pshufb %xmm0, %xmm3
7105 ; SSE41-NEXT: paddb %xmm4, %xmm3
7106 ; SSE41-NEXT: pxor %xmm1, %xmm1
7107 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
7108 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
7109 ; SSE41-NEXT: psadbw %xmm1, %xmm3
7110 ; SSE41-NEXT: psadbw %xmm1, %xmm0
7111 ; SSE41-NEXT: packuswb %xmm3, %xmm0
7112 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7115 ; AVX1-LABEL: ugt_5_v4i32:
7117 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7118 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
7119 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7120 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
7121 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
7122 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
7123 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
7124 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
7125 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
7126 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7127 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7128 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7129 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7130 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7131 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
7134 ; AVX2-LABEL: ugt_5_v4i32:
7136 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7137 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
7138 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7139 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
7140 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
7141 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
7142 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
7143 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
7144 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
7145 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7146 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7147 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7148 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7149 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7150 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5]
7151 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
7154 ; AVX512VPOPCNTDQ-LABEL: ugt_5_v4i32:
7155 ; AVX512VPOPCNTDQ: # %bb.0:
7156 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7157 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
7158 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5]
7159 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
7160 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
7161 ; AVX512VPOPCNTDQ-NEXT: retq
7163 ; AVX512VPOPCNTDQVL-LABEL: ugt_5_v4i32:
7164 ; AVX512VPOPCNTDQVL: # %bb.0:
7165 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
7166 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5]
7167 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
7168 ; AVX512VPOPCNTDQVL-NEXT: retq
7170 ; BITALG_NOVLX-LABEL: ugt_5_v4i32:
7171 ; BITALG_NOVLX: # %bb.0:
7172 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7173 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
7174 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
7175 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7176 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7177 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7178 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7179 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7180 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5]
7181 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
7182 ; BITALG_NOVLX-NEXT: vzeroupper
7183 ; BITALG_NOVLX-NEXT: retq
7185 ; BITALG-LABEL: ugt_5_v4i32:
7187 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
7188 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
7189 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7190 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7191 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7192 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7193 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7194 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [5,5,5,5]
7195 ; BITALG-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
7197 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
7198 %3 = icmp ugt <4 x i32> %2, <i32 5, i32 5, i32 5, i32 5>
7199 %4 = sext <4 x i1> %3 to <4 x i32>
7203 define <4 x i32> @ult_6_v4i32(<4 x i32> %0) {
7204 ; SSE2-LABEL: ult_6_v4i32:
7206 ; SSE2-NEXT: movdqa %xmm0, %xmm1
7207 ; SSE2-NEXT: psrlw $1, %xmm1
7208 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7209 ; SSE2-NEXT: psubb %xmm1, %xmm0
7210 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7211 ; SSE2-NEXT: movdqa %xmm0, %xmm2
7212 ; SSE2-NEXT: pand %xmm1, %xmm2
7213 ; SSE2-NEXT: psrlw $2, %xmm0
7214 ; SSE2-NEXT: pand %xmm1, %xmm0
7215 ; SSE2-NEXT: paddb %xmm2, %xmm0
7216 ; SSE2-NEXT: movdqa %xmm0, %xmm1
7217 ; SSE2-NEXT: psrlw $4, %xmm1
7218 ; SSE2-NEXT: paddb %xmm0, %xmm1
7219 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7220 ; SSE2-NEXT: pxor %xmm0, %xmm0
7221 ; SSE2-NEXT: movdqa %xmm1, %xmm2
7222 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
7223 ; SSE2-NEXT: psadbw %xmm0, %xmm2
7224 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
7225 ; SSE2-NEXT: psadbw %xmm0, %xmm1
7226 ; SSE2-NEXT: packuswb %xmm2, %xmm1
7227 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6]
7228 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
7231 ; SSE3-LABEL: ult_6_v4i32:
7233 ; SSE3-NEXT: movdqa %xmm0, %xmm1
7234 ; SSE3-NEXT: psrlw $1, %xmm1
7235 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7236 ; SSE3-NEXT: psubb %xmm1, %xmm0
7237 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7238 ; SSE3-NEXT: movdqa %xmm0, %xmm2
7239 ; SSE3-NEXT: pand %xmm1, %xmm2
7240 ; SSE3-NEXT: psrlw $2, %xmm0
7241 ; SSE3-NEXT: pand %xmm1, %xmm0
7242 ; SSE3-NEXT: paddb %xmm2, %xmm0
7243 ; SSE3-NEXT: movdqa %xmm0, %xmm1
7244 ; SSE3-NEXT: psrlw $4, %xmm1
7245 ; SSE3-NEXT: paddb %xmm0, %xmm1
7246 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7247 ; SSE3-NEXT: pxor %xmm0, %xmm0
7248 ; SSE3-NEXT: movdqa %xmm1, %xmm2
7249 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
7250 ; SSE3-NEXT: psadbw %xmm0, %xmm2
7251 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
7252 ; SSE3-NEXT: psadbw %xmm0, %xmm1
7253 ; SSE3-NEXT: packuswb %xmm2, %xmm1
7254 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6]
7255 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
7258 ; SSSE3-LABEL: ult_6_v4i32:
7260 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7261 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
7262 ; SSSE3-NEXT: pand %xmm1, %xmm2
7263 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7264 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
7265 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
7266 ; SSSE3-NEXT: psrlw $4, %xmm0
7267 ; SSSE3-NEXT: pand %xmm1, %xmm0
7268 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
7269 ; SSSE3-NEXT: paddb %xmm4, %xmm3
7270 ; SSSE3-NEXT: pxor %xmm0, %xmm0
7271 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
7272 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
7273 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
7274 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
7275 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
7276 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
7277 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6]
7278 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
7281 ; SSE41-LABEL: ult_6_v4i32:
7283 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7284 ; SSE41-NEXT: movdqa %xmm0, %xmm2
7285 ; SSE41-NEXT: pand %xmm1, %xmm2
7286 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7287 ; SSE41-NEXT: movdqa %xmm3, %xmm4
7288 ; SSE41-NEXT: pshufb %xmm2, %xmm4
7289 ; SSE41-NEXT: psrlw $4, %xmm0
7290 ; SSE41-NEXT: pand %xmm1, %xmm0
7291 ; SSE41-NEXT: pshufb %xmm0, %xmm3
7292 ; SSE41-NEXT: paddb %xmm4, %xmm3
7293 ; SSE41-NEXT: pxor %xmm0, %xmm0
7294 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
7295 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
7296 ; SSE41-NEXT: psadbw %xmm0, %xmm3
7297 ; SSE41-NEXT: psadbw %xmm0, %xmm1
7298 ; SSE41-NEXT: packuswb %xmm3, %xmm1
7299 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [6,6,6,6]
7300 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
7303 ; AVX1-LABEL: ult_6_v4i32:
7305 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7306 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
7307 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7308 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
7309 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
7310 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
7311 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
7312 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
7313 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
7314 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7315 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7316 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7317 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7318 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7319 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [6,6,6,6]
7320 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
7323 ; AVX2-LABEL: ult_6_v4i32:
7325 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7326 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
7327 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7328 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
7329 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
7330 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
7331 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
7332 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
7333 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
7334 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7335 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7336 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7337 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7338 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7339 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6]
7340 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
7343 ; AVX512VPOPCNTDQ-LABEL: ult_6_v4i32:
7344 ; AVX512VPOPCNTDQ: # %bb.0:
7345 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7346 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
7347 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6]
7348 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
7349 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
7350 ; AVX512VPOPCNTDQ-NEXT: retq
7352 ; AVX512VPOPCNTDQVL-LABEL: ult_6_v4i32:
7353 ; AVX512VPOPCNTDQVL: # %bb.0:
7354 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
7355 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6]
7356 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
7357 ; AVX512VPOPCNTDQVL-NEXT: retq
7359 ; BITALG_NOVLX-LABEL: ult_6_v4i32:
7360 ; BITALG_NOVLX: # %bb.0:
7361 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7362 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
7363 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
7364 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7365 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7366 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7367 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7368 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7369 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6]
7370 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
7371 ; BITALG_NOVLX-NEXT: vzeroupper
7372 ; BITALG_NOVLX-NEXT: retq
7374 ; BITALG-LABEL: ult_6_v4i32:
7376 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
7377 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
7378 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7379 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7380 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7381 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7382 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7383 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6]
7384 ; BITALG-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
7386 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
7387 %3 = icmp ult <4 x i32> %2, <i32 6, i32 6, i32 6, i32 6>
7388 %4 = sext <4 x i1> %3 to <4 x i32>
7392 define <4 x i32> @ugt_6_v4i32(<4 x i32> %0) {
7393 ; SSE2-LABEL: ugt_6_v4i32:
7395 ; SSE2-NEXT: movdqa %xmm0, %xmm1
7396 ; SSE2-NEXT: psrlw $1, %xmm1
7397 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7398 ; SSE2-NEXT: psubb %xmm1, %xmm0
7399 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7400 ; SSE2-NEXT: movdqa %xmm0, %xmm2
7401 ; SSE2-NEXT: pand %xmm1, %xmm2
7402 ; SSE2-NEXT: psrlw $2, %xmm0
7403 ; SSE2-NEXT: pand %xmm1, %xmm0
7404 ; SSE2-NEXT: paddb %xmm2, %xmm0
7405 ; SSE2-NEXT: movdqa %xmm0, %xmm1
7406 ; SSE2-NEXT: psrlw $4, %xmm1
7407 ; SSE2-NEXT: paddb %xmm1, %xmm0
7408 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7409 ; SSE2-NEXT: pxor %xmm1, %xmm1
7410 ; SSE2-NEXT: movdqa %xmm0, %xmm2
7411 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
7412 ; SSE2-NEXT: psadbw %xmm1, %xmm2
7413 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
7414 ; SSE2-NEXT: psadbw %xmm1, %xmm0
7415 ; SSE2-NEXT: packuswb %xmm2, %xmm0
7416 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7419 ; SSE3-LABEL: ugt_6_v4i32:
7421 ; SSE3-NEXT: movdqa %xmm0, %xmm1
7422 ; SSE3-NEXT: psrlw $1, %xmm1
7423 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7424 ; SSE3-NEXT: psubb %xmm1, %xmm0
7425 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7426 ; SSE3-NEXT: movdqa %xmm0, %xmm2
7427 ; SSE3-NEXT: pand %xmm1, %xmm2
7428 ; SSE3-NEXT: psrlw $2, %xmm0
7429 ; SSE3-NEXT: pand %xmm1, %xmm0
7430 ; SSE3-NEXT: paddb %xmm2, %xmm0
7431 ; SSE3-NEXT: movdqa %xmm0, %xmm1
7432 ; SSE3-NEXT: psrlw $4, %xmm1
7433 ; SSE3-NEXT: paddb %xmm1, %xmm0
7434 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7435 ; SSE3-NEXT: pxor %xmm1, %xmm1
7436 ; SSE3-NEXT: movdqa %xmm0, %xmm2
7437 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
7438 ; SSE3-NEXT: psadbw %xmm1, %xmm2
7439 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
7440 ; SSE3-NEXT: psadbw %xmm1, %xmm0
7441 ; SSE3-NEXT: packuswb %xmm2, %xmm0
7442 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7445 ; SSSE3-LABEL: ugt_6_v4i32:
7447 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7448 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
7449 ; SSSE3-NEXT: pand %xmm2, %xmm3
7450 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7451 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
7452 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
7453 ; SSSE3-NEXT: psrlw $4, %xmm0
7454 ; SSSE3-NEXT: pand %xmm2, %xmm0
7455 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
7456 ; SSSE3-NEXT: paddb %xmm4, %xmm1
7457 ; SSSE3-NEXT: pxor %xmm0, %xmm0
7458 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
7459 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
7460 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
7461 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
7462 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
7463 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
7464 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7465 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
7468 ; SSE41-LABEL: ugt_6_v4i32:
7470 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7471 ; SSE41-NEXT: movdqa %xmm0, %xmm2
7472 ; SSE41-NEXT: pand %xmm1, %xmm2
7473 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7474 ; SSE41-NEXT: movdqa %xmm3, %xmm4
7475 ; SSE41-NEXT: pshufb %xmm2, %xmm4
7476 ; SSE41-NEXT: psrlw $4, %xmm0
7477 ; SSE41-NEXT: pand %xmm1, %xmm0
7478 ; SSE41-NEXT: pshufb %xmm0, %xmm3
7479 ; SSE41-NEXT: paddb %xmm4, %xmm3
7480 ; SSE41-NEXT: pxor %xmm1, %xmm1
7481 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
7482 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
7483 ; SSE41-NEXT: psadbw %xmm1, %xmm3
7484 ; SSE41-NEXT: psadbw %xmm1, %xmm0
7485 ; SSE41-NEXT: packuswb %xmm3, %xmm0
7486 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7489 ; AVX1-LABEL: ugt_6_v4i32:
7491 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7492 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
7493 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7494 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
7495 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
7496 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
7497 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
7498 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
7499 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
7500 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7501 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7502 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7503 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7504 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7505 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
7508 ; AVX2-LABEL: ugt_6_v4i32:
7510 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7511 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
7512 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7513 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
7514 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
7515 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
7516 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
7517 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
7518 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
7519 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7520 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7521 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7522 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7523 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7524 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6]
7525 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
7528 ; AVX512VPOPCNTDQ-LABEL: ugt_6_v4i32:
7529 ; AVX512VPOPCNTDQ: # %bb.0:
7530 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7531 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
7532 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6]
7533 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
7534 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
7535 ; AVX512VPOPCNTDQ-NEXT: retq
7537 ; AVX512VPOPCNTDQVL-LABEL: ugt_6_v4i32:
7538 ; AVX512VPOPCNTDQVL: # %bb.0:
7539 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
7540 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6]
7541 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
7542 ; AVX512VPOPCNTDQVL-NEXT: retq
7544 ; BITALG_NOVLX-LABEL: ugt_6_v4i32:
7545 ; BITALG_NOVLX: # %bb.0:
7546 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7547 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
7548 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
7549 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7550 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7551 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7552 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7553 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7554 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6]
7555 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
7556 ; BITALG_NOVLX-NEXT: vzeroupper
7557 ; BITALG_NOVLX-NEXT: retq
7559 ; BITALG-LABEL: ugt_6_v4i32:
7561 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
7562 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
7563 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7564 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7565 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7566 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7567 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7568 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [6,6,6,6]
7569 ; BITALG-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
7571 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
7572 %3 = icmp ugt <4 x i32> %2, <i32 6, i32 6, i32 6, i32 6>
7573 %4 = sext <4 x i1> %3 to <4 x i32>
7577 define <4 x i32> @ult_7_v4i32(<4 x i32> %0) {
7578 ; SSE2-LABEL: ult_7_v4i32:
7580 ; SSE2-NEXT: movdqa %xmm0, %xmm1
7581 ; SSE2-NEXT: psrlw $1, %xmm1
7582 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7583 ; SSE2-NEXT: psubb %xmm1, %xmm0
7584 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7585 ; SSE2-NEXT: movdqa %xmm0, %xmm2
7586 ; SSE2-NEXT: pand %xmm1, %xmm2
7587 ; SSE2-NEXT: psrlw $2, %xmm0
7588 ; SSE2-NEXT: pand %xmm1, %xmm0
7589 ; SSE2-NEXT: paddb %xmm2, %xmm0
7590 ; SSE2-NEXT: movdqa %xmm0, %xmm1
7591 ; SSE2-NEXT: psrlw $4, %xmm1
7592 ; SSE2-NEXT: paddb %xmm0, %xmm1
7593 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7594 ; SSE2-NEXT: pxor %xmm0, %xmm0
7595 ; SSE2-NEXT: movdqa %xmm1, %xmm2
7596 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
7597 ; SSE2-NEXT: psadbw %xmm0, %xmm2
7598 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
7599 ; SSE2-NEXT: psadbw %xmm0, %xmm1
7600 ; SSE2-NEXT: packuswb %xmm2, %xmm1
7601 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7]
7602 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
7605 ; SSE3-LABEL: ult_7_v4i32:
7607 ; SSE3-NEXT: movdqa %xmm0, %xmm1
7608 ; SSE3-NEXT: psrlw $1, %xmm1
7609 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7610 ; SSE3-NEXT: psubb %xmm1, %xmm0
7611 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7612 ; SSE3-NEXT: movdqa %xmm0, %xmm2
7613 ; SSE3-NEXT: pand %xmm1, %xmm2
7614 ; SSE3-NEXT: psrlw $2, %xmm0
7615 ; SSE3-NEXT: pand %xmm1, %xmm0
7616 ; SSE3-NEXT: paddb %xmm2, %xmm0
7617 ; SSE3-NEXT: movdqa %xmm0, %xmm1
7618 ; SSE3-NEXT: psrlw $4, %xmm1
7619 ; SSE3-NEXT: paddb %xmm0, %xmm1
7620 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7621 ; SSE3-NEXT: pxor %xmm0, %xmm0
7622 ; SSE3-NEXT: movdqa %xmm1, %xmm2
7623 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
7624 ; SSE3-NEXT: psadbw %xmm0, %xmm2
7625 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
7626 ; SSE3-NEXT: psadbw %xmm0, %xmm1
7627 ; SSE3-NEXT: packuswb %xmm2, %xmm1
7628 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7]
7629 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
7632 ; SSSE3-LABEL: ult_7_v4i32:
7634 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7635 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
7636 ; SSSE3-NEXT: pand %xmm1, %xmm2
7637 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7638 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
7639 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
7640 ; SSSE3-NEXT: psrlw $4, %xmm0
7641 ; SSSE3-NEXT: pand %xmm1, %xmm0
7642 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
7643 ; SSSE3-NEXT: paddb %xmm4, %xmm3
7644 ; SSSE3-NEXT: pxor %xmm0, %xmm0
7645 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
7646 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
7647 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
7648 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
7649 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
7650 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
7651 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7]
7652 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
7655 ; SSE41-LABEL: ult_7_v4i32:
7657 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7658 ; SSE41-NEXT: movdqa %xmm0, %xmm2
7659 ; SSE41-NEXT: pand %xmm1, %xmm2
7660 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7661 ; SSE41-NEXT: movdqa %xmm3, %xmm4
7662 ; SSE41-NEXT: pshufb %xmm2, %xmm4
7663 ; SSE41-NEXT: psrlw $4, %xmm0
7664 ; SSE41-NEXT: pand %xmm1, %xmm0
7665 ; SSE41-NEXT: pshufb %xmm0, %xmm3
7666 ; SSE41-NEXT: paddb %xmm4, %xmm3
7667 ; SSE41-NEXT: pxor %xmm0, %xmm0
7668 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
7669 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
7670 ; SSE41-NEXT: psadbw %xmm0, %xmm3
7671 ; SSE41-NEXT: psadbw %xmm0, %xmm1
7672 ; SSE41-NEXT: packuswb %xmm3, %xmm1
7673 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [7,7,7,7]
7674 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
7677 ; AVX1-LABEL: ult_7_v4i32:
7679 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7680 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
7681 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7682 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
7683 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
7684 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
7685 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
7686 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
7687 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
7688 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7689 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7690 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7691 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7692 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7693 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [7,7,7,7]
7694 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
7697 ; AVX2-LABEL: ult_7_v4i32:
7699 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7700 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
7701 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7702 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
7703 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
7704 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
7705 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
7706 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
7707 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
7708 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7709 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7710 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7711 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7712 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7713 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
7714 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
7717 ; AVX512VPOPCNTDQ-LABEL: ult_7_v4i32:
7718 ; AVX512VPOPCNTDQ: # %bb.0:
7719 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7720 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
7721 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
7722 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
7723 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
7724 ; AVX512VPOPCNTDQ-NEXT: retq
7726 ; AVX512VPOPCNTDQVL-LABEL: ult_7_v4i32:
7727 ; AVX512VPOPCNTDQVL: # %bb.0:
7728 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
7729 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
7730 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
7731 ; AVX512VPOPCNTDQVL-NEXT: retq
7733 ; BITALG_NOVLX-LABEL: ult_7_v4i32:
7734 ; BITALG_NOVLX: # %bb.0:
7735 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7736 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
7737 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
7738 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7739 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7740 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7741 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7742 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7743 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
7744 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
7745 ; BITALG_NOVLX-NEXT: vzeroupper
7746 ; BITALG_NOVLX-NEXT: retq
7748 ; BITALG-LABEL: ult_7_v4i32:
7750 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
7751 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
7752 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7753 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7754 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7755 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7756 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7757 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
7758 ; BITALG-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
7760 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
7761 %3 = icmp ult <4 x i32> %2, <i32 7, i32 7, i32 7, i32 7>
7762 %4 = sext <4 x i1> %3 to <4 x i32>
7766 define <4 x i32> @ugt_7_v4i32(<4 x i32> %0) {
7767 ; SSE2-LABEL: ugt_7_v4i32:
7769 ; SSE2-NEXT: movdqa %xmm0, %xmm1
7770 ; SSE2-NEXT: psrlw $1, %xmm1
7771 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7772 ; SSE2-NEXT: psubb %xmm1, %xmm0
7773 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7774 ; SSE2-NEXT: movdqa %xmm0, %xmm2
7775 ; SSE2-NEXT: pand %xmm1, %xmm2
7776 ; SSE2-NEXT: psrlw $2, %xmm0
7777 ; SSE2-NEXT: pand %xmm1, %xmm0
7778 ; SSE2-NEXT: paddb %xmm2, %xmm0
7779 ; SSE2-NEXT: movdqa %xmm0, %xmm1
7780 ; SSE2-NEXT: psrlw $4, %xmm1
7781 ; SSE2-NEXT: paddb %xmm1, %xmm0
7782 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7783 ; SSE2-NEXT: pxor %xmm1, %xmm1
7784 ; SSE2-NEXT: movdqa %xmm0, %xmm2
7785 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
7786 ; SSE2-NEXT: psadbw %xmm1, %xmm2
7787 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
7788 ; SSE2-NEXT: psadbw %xmm1, %xmm0
7789 ; SSE2-NEXT: packuswb %xmm2, %xmm0
7790 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7793 ; SSE3-LABEL: ugt_7_v4i32:
7795 ; SSE3-NEXT: movdqa %xmm0, %xmm1
7796 ; SSE3-NEXT: psrlw $1, %xmm1
7797 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7798 ; SSE3-NEXT: psubb %xmm1, %xmm0
7799 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7800 ; SSE3-NEXT: movdqa %xmm0, %xmm2
7801 ; SSE3-NEXT: pand %xmm1, %xmm2
7802 ; SSE3-NEXT: psrlw $2, %xmm0
7803 ; SSE3-NEXT: pand %xmm1, %xmm0
7804 ; SSE3-NEXT: paddb %xmm2, %xmm0
7805 ; SSE3-NEXT: movdqa %xmm0, %xmm1
7806 ; SSE3-NEXT: psrlw $4, %xmm1
7807 ; SSE3-NEXT: paddb %xmm1, %xmm0
7808 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7809 ; SSE3-NEXT: pxor %xmm1, %xmm1
7810 ; SSE3-NEXT: movdqa %xmm0, %xmm2
7811 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
7812 ; SSE3-NEXT: psadbw %xmm1, %xmm2
7813 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
7814 ; SSE3-NEXT: psadbw %xmm1, %xmm0
7815 ; SSE3-NEXT: packuswb %xmm2, %xmm0
7816 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7819 ; SSSE3-LABEL: ugt_7_v4i32:
7821 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7822 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
7823 ; SSSE3-NEXT: pand %xmm2, %xmm3
7824 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7825 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
7826 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
7827 ; SSSE3-NEXT: psrlw $4, %xmm0
7828 ; SSSE3-NEXT: pand %xmm2, %xmm0
7829 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
7830 ; SSSE3-NEXT: paddb %xmm4, %xmm1
7831 ; SSSE3-NEXT: pxor %xmm0, %xmm0
7832 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
7833 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
7834 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
7835 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
7836 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
7837 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
7838 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7839 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
7842 ; SSE41-LABEL: ugt_7_v4i32:
7844 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7845 ; SSE41-NEXT: movdqa %xmm0, %xmm2
7846 ; SSE41-NEXT: pand %xmm1, %xmm2
7847 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7848 ; SSE41-NEXT: movdqa %xmm3, %xmm4
7849 ; SSE41-NEXT: pshufb %xmm2, %xmm4
7850 ; SSE41-NEXT: psrlw $4, %xmm0
7851 ; SSE41-NEXT: pand %xmm1, %xmm0
7852 ; SSE41-NEXT: pshufb %xmm0, %xmm3
7853 ; SSE41-NEXT: paddb %xmm4, %xmm3
7854 ; SSE41-NEXT: pxor %xmm1, %xmm1
7855 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
7856 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
7857 ; SSE41-NEXT: psadbw %xmm1, %xmm3
7858 ; SSE41-NEXT: psadbw %xmm1, %xmm0
7859 ; SSE41-NEXT: packuswb %xmm3, %xmm0
7860 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7863 ; AVX1-LABEL: ugt_7_v4i32:
7865 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7866 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
7867 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7868 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
7869 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
7870 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
7871 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
7872 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
7873 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
7874 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7875 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7876 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7877 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7878 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7879 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
7882 ; AVX2-LABEL: ugt_7_v4i32:
7884 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7885 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
7886 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
7887 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
7888 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
7889 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
7890 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
7891 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
7892 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
7893 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7894 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7895 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7896 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7897 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7898 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
7899 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
7902 ; AVX512VPOPCNTDQ-LABEL: ugt_7_v4i32:
7903 ; AVX512VPOPCNTDQ: # %bb.0:
7904 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7905 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
7906 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
7907 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
7908 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
7909 ; AVX512VPOPCNTDQ-NEXT: retq
7911 ; AVX512VPOPCNTDQVL-LABEL: ugt_7_v4i32:
7912 ; AVX512VPOPCNTDQVL: # %bb.0:
7913 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
7914 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
7915 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
7916 ; AVX512VPOPCNTDQVL-NEXT: retq
7918 ; BITALG_NOVLX-LABEL: ugt_7_v4i32:
7919 ; BITALG_NOVLX: # %bb.0:
7920 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
7921 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
7922 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
7923 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7924 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7925 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7926 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7927 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7928 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
7929 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
7930 ; BITALG_NOVLX-NEXT: vzeroupper
7931 ; BITALG_NOVLX-NEXT: retq
7933 ; BITALG-LABEL: ugt_7_v4i32:
7935 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
7936 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
7937 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
7938 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
7939 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
7940 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
7941 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
7942 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
7943 ; BITALG-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
7945 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
7946 %3 = icmp ugt <4 x i32> %2, <i32 7, i32 7, i32 7, i32 7>
7947 %4 = sext <4 x i1> %3 to <4 x i32>
7951 define <4 x i32> @ult_8_v4i32(<4 x i32> %0) {
7952 ; SSE2-LABEL: ult_8_v4i32:
7954 ; SSE2-NEXT: movdqa %xmm0, %xmm1
7955 ; SSE2-NEXT: psrlw $1, %xmm1
7956 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7957 ; SSE2-NEXT: psubb %xmm1, %xmm0
7958 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7959 ; SSE2-NEXT: movdqa %xmm0, %xmm2
7960 ; SSE2-NEXT: pand %xmm1, %xmm2
7961 ; SSE2-NEXT: psrlw $2, %xmm0
7962 ; SSE2-NEXT: pand %xmm1, %xmm0
7963 ; SSE2-NEXT: paddb %xmm2, %xmm0
7964 ; SSE2-NEXT: movdqa %xmm0, %xmm1
7965 ; SSE2-NEXT: psrlw $4, %xmm1
7966 ; SSE2-NEXT: paddb %xmm0, %xmm1
7967 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7968 ; SSE2-NEXT: pxor %xmm0, %xmm0
7969 ; SSE2-NEXT: movdqa %xmm1, %xmm2
7970 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
7971 ; SSE2-NEXT: psadbw %xmm0, %xmm2
7972 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
7973 ; SSE2-NEXT: psadbw %xmm0, %xmm1
7974 ; SSE2-NEXT: packuswb %xmm2, %xmm1
7975 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
7976 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
7979 ; SSE3-LABEL: ult_8_v4i32:
7981 ; SSE3-NEXT: movdqa %xmm0, %xmm1
7982 ; SSE3-NEXT: psrlw $1, %xmm1
7983 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7984 ; SSE3-NEXT: psubb %xmm1, %xmm0
7985 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
7986 ; SSE3-NEXT: movdqa %xmm0, %xmm2
7987 ; SSE3-NEXT: pand %xmm1, %xmm2
7988 ; SSE3-NEXT: psrlw $2, %xmm0
7989 ; SSE3-NEXT: pand %xmm1, %xmm0
7990 ; SSE3-NEXT: paddb %xmm2, %xmm0
7991 ; SSE3-NEXT: movdqa %xmm0, %xmm1
7992 ; SSE3-NEXT: psrlw $4, %xmm1
7993 ; SSE3-NEXT: paddb %xmm0, %xmm1
7994 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7995 ; SSE3-NEXT: pxor %xmm0, %xmm0
7996 ; SSE3-NEXT: movdqa %xmm1, %xmm2
7997 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
7998 ; SSE3-NEXT: psadbw %xmm0, %xmm2
7999 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
8000 ; SSE3-NEXT: psadbw %xmm0, %xmm1
8001 ; SSE3-NEXT: packuswb %xmm2, %xmm1
8002 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
8003 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
8006 ; SSSE3-LABEL: ult_8_v4i32:
8008 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8009 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
8010 ; SSSE3-NEXT: pand %xmm1, %xmm2
8011 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8012 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
8013 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
8014 ; SSSE3-NEXT: psrlw $4, %xmm0
8015 ; SSSE3-NEXT: pand %xmm1, %xmm0
8016 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
8017 ; SSSE3-NEXT: paddb %xmm4, %xmm3
8018 ; SSSE3-NEXT: pxor %xmm0, %xmm0
8019 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
8020 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
8021 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
8022 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
8023 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
8024 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
8025 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
8026 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
8029 ; SSE41-LABEL: ult_8_v4i32:
8031 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8032 ; SSE41-NEXT: movdqa %xmm0, %xmm2
8033 ; SSE41-NEXT: pand %xmm1, %xmm2
8034 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8035 ; SSE41-NEXT: movdqa %xmm3, %xmm4
8036 ; SSE41-NEXT: pshufb %xmm2, %xmm4
8037 ; SSE41-NEXT: psrlw $4, %xmm0
8038 ; SSE41-NEXT: pand %xmm1, %xmm0
8039 ; SSE41-NEXT: pshufb %xmm0, %xmm3
8040 ; SSE41-NEXT: paddb %xmm4, %xmm3
8041 ; SSE41-NEXT: pxor %xmm0, %xmm0
8042 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
8043 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
8044 ; SSE41-NEXT: psadbw %xmm0, %xmm3
8045 ; SSE41-NEXT: psadbw %xmm0, %xmm1
8046 ; SSE41-NEXT: packuswb %xmm3, %xmm1
8047 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [8,8,8,8]
8048 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
8051 ; AVX1-LABEL: ult_8_v4i32:
8053 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8054 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
8055 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8056 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
8057 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
8058 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
8059 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
8060 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
8061 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
8062 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8063 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8064 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8065 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8066 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8067 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [8,8,8,8]
8068 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
8071 ; AVX2-LABEL: ult_8_v4i32:
8073 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8074 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
8075 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8076 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
8077 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
8078 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
8079 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
8080 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
8081 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
8082 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8083 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8084 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8085 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8086 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8087 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8]
8088 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
8091 ; AVX512VPOPCNTDQ-LABEL: ult_8_v4i32:
8092 ; AVX512VPOPCNTDQ: # %bb.0:
8093 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8094 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
8095 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8]
8096 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
8097 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
8098 ; AVX512VPOPCNTDQ-NEXT: retq
8100 ; AVX512VPOPCNTDQVL-LABEL: ult_8_v4i32:
8101 ; AVX512VPOPCNTDQVL: # %bb.0:
8102 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
8103 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8]
8104 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
8105 ; AVX512VPOPCNTDQVL-NEXT: retq
8107 ; BITALG_NOVLX-LABEL: ult_8_v4i32:
8108 ; BITALG_NOVLX: # %bb.0:
8109 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8110 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
8111 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
8112 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8113 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8114 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8115 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8116 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8117 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8]
8118 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
8119 ; BITALG_NOVLX-NEXT: vzeroupper
8120 ; BITALG_NOVLX-NEXT: retq
8122 ; BITALG-LABEL: ult_8_v4i32:
8124 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
8125 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
8126 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8127 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8128 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8129 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8130 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8131 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8]
8132 ; BITALG-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
8134 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
8135 %3 = icmp ult <4 x i32> %2, <i32 8, i32 8, i32 8, i32 8>
8136 %4 = sext <4 x i1> %3 to <4 x i32>
8140 define <4 x i32> @ugt_8_v4i32(<4 x i32> %0) {
8141 ; SSE2-LABEL: ugt_8_v4i32:
8143 ; SSE2-NEXT: movdqa %xmm0, %xmm1
8144 ; SSE2-NEXT: psrlw $1, %xmm1
8145 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8146 ; SSE2-NEXT: psubb %xmm1, %xmm0
8147 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
8148 ; SSE2-NEXT: movdqa %xmm0, %xmm2
8149 ; SSE2-NEXT: pand %xmm1, %xmm2
8150 ; SSE2-NEXT: psrlw $2, %xmm0
8151 ; SSE2-NEXT: pand %xmm1, %xmm0
8152 ; SSE2-NEXT: paddb %xmm2, %xmm0
8153 ; SSE2-NEXT: movdqa %xmm0, %xmm1
8154 ; SSE2-NEXT: psrlw $4, %xmm1
8155 ; SSE2-NEXT: paddb %xmm1, %xmm0
8156 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
8157 ; SSE2-NEXT: pxor %xmm1, %xmm1
8158 ; SSE2-NEXT: movdqa %xmm0, %xmm2
8159 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
8160 ; SSE2-NEXT: psadbw %xmm1, %xmm2
8161 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
8162 ; SSE2-NEXT: psadbw %xmm1, %xmm0
8163 ; SSE2-NEXT: packuswb %xmm2, %xmm0
8164 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
8167 ; SSE3-LABEL: ugt_8_v4i32:
8169 ; SSE3-NEXT: movdqa %xmm0, %xmm1
8170 ; SSE3-NEXT: psrlw $1, %xmm1
8171 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8172 ; SSE3-NEXT: psubb %xmm1, %xmm0
8173 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
8174 ; SSE3-NEXT: movdqa %xmm0, %xmm2
8175 ; SSE3-NEXT: pand %xmm1, %xmm2
8176 ; SSE3-NEXT: psrlw $2, %xmm0
8177 ; SSE3-NEXT: pand %xmm1, %xmm0
8178 ; SSE3-NEXT: paddb %xmm2, %xmm0
8179 ; SSE3-NEXT: movdqa %xmm0, %xmm1
8180 ; SSE3-NEXT: psrlw $4, %xmm1
8181 ; SSE3-NEXT: paddb %xmm1, %xmm0
8182 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
8183 ; SSE3-NEXT: pxor %xmm1, %xmm1
8184 ; SSE3-NEXT: movdqa %xmm0, %xmm2
8185 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
8186 ; SSE3-NEXT: psadbw %xmm1, %xmm2
8187 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
8188 ; SSE3-NEXT: psadbw %xmm1, %xmm0
8189 ; SSE3-NEXT: packuswb %xmm2, %xmm0
8190 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
8193 ; SSSE3-LABEL: ugt_8_v4i32:
8195 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8196 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
8197 ; SSSE3-NEXT: pand %xmm2, %xmm3
8198 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8199 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
8200 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
8201 ; SSSE3-NEXT: psrlw $4, %xmm0
8202 ; SSSE3-NEXT: pand %xmm2, %xmm0
8203 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
8204 ; SSSE3-NEXT: paddb %xmm4, %xmm1
8205 ; SSSE3-NEXT: pxor %xmm0, %xmm0
8206 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
8207 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
8208 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
8209 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
8210 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
8211 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
8212 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8213 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
8216 ; SSE41-LABEL: ugt_8_v4i32:
8218 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8219 ; SSE41-NEXT: movdqa %xmm0, %xmm2
8220 ; SSE41-NEXT: pand %xmm1, %xmm2
8221 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8222 ; SSE41-NEXT: movdqa %xmm3, %xmm4
8223 ; SSE41-NEXT: pshufb %xmm2, %xmm4
8224 ; SSE41-NEXT: psrlw $4, %xmm0
8225 ; SSE41-NEXT: pand %xmm1, %xmm0
8226 ; SSE41-NEXT: pshufb %xmm0, %xmm3
8227 ; SSE41-NEXT: paddb %xmm4, %xmm3
8228 ; SSE41-NEXT: pxor %xmm1, %xmm1
8229 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
8230 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
8231 ; SSE41-NEXT: psadbw %xmm1, %xmm3
8232 ; SSE41-NEXT: psadbw %xmm1, %xmm0
8233 ; SSE41-NEXT: packuswb %xmm3, %xmm0
8234 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
8237 ; AVX1-LABEL: ugt_8_v4i32:
8239 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8240 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
8241 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8242 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
8243 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
8244 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
8245 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
8246 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
8247 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
8248 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8249 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8250 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8251 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8252 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8253 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
8256 ; AVX2-LABEL: ugt_8_v4i32:
8258 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8259 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
8260 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8261 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
8262 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
8263 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
8264 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
8265 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
8266 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
8267 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8268 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8269 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8270 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8271 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8272 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8]
8273 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
8276 ; AVX512VPOPCNTDQ-LABEL: ugt_8_v4i32:
8277 ; AVX512VPOPCNTDQ: # %bb.0:
8278 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8279 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
8280 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8]
8281 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
8282 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
8283 ; AVX512VPOPCNTDQ-NEXT: retq
8285 ; AVX512VPOPCNTDQVL-LABEL: ugt_8_v4i32:
8286 ; AVX512VPOPCNTDQVL: # %bb.0:
8287 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
8288 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8]
8289 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
8290 ; AVX512VPOPCNTDQVL-NEXT: retq
8292 ; BITALG_NOVLX-LABEL: ugt_8_v4i32:
8293 ; BITALG_NOVLX: # %bb.0:
8294 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8295 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
8296 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
8297 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8298 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8299 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8300 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8301 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8302 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8]
8303 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
8304 ; BITALG_NOVLX-NEXT: vzeroupper
8305 ; BITALG_NOVLX-NEXT: retq
8307 ; BITALG-LABEL: ugt_8_v4i32:
8309 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
8310 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
8311 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8312 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8313 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8314 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8315 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8316 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [8,8,8,8]
8317 ; BITALG-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
8319 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
8320 %3 = icmp ugt <4 x i32> %2, <i32 8, i32 8, i32 8, i32 8>
8321 %4 = sext <4 x i1> %3 to <4 x i32>
8325 define <4 x i32> @ult_9_v4i32(<4 x i32> %0) {
8326 ; SSE2-LABEL: ult_9_v4i32:
8328 ; SSE2-NEXT: movdqa %xmm0, %xmm1
8329 ; SSE2-NEXT: psrlw $1, %xmm1
8330 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8331 ; SSE2-NEXT: psubb %xmm1, %xmm0
8332 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
8333 ; SSE2-NEXT: movdqa %xmm0, %xmm2
8334 ; SSE2-NEXT: pand %xmm1, %xmm2
8335 ; SSE2-NEXT: psrlw $2, %xmm0
8336 ; SSE2-NEXT: pand %xmm1, %xmm0
8337 ; SSE2-NEXT: paddb %xmm2, %xmm0
8338 ; SSE2-NEXT: movdqa %xmm0, %xmm1
8339 ; SSE2-NEXT: psrlw $4, %xmm1
8340 ; SSE2-NEXT: paddb %xmm0, %xmm1
8341 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8342 ; SSE2-NEXT: pxor %xmm0, %xmm0
8343 ; SSE2-NEXT: movdqa %xmm1, %xmm2
8344 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
8345 ; SSE2-NEXT: psadbw %xmm0, %xmm2
8346 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
8347 ; SSE2-NEXT: psadbw %xmm0, %xmm1
8348 ; SSE2-NEXT: packuswb %xmm2, %xmm1
8349 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9]
8350 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
8353 ; SSE3-LABEL: ult_9_v4i32:
8355 ; SSE3-NEXT: movdqa %xmm0, %xmm1
8356 ; SSE3-NEXT: psrlw $1, %xmm1
8357 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8358 ; SSE3-NEXT: psubb %xmm1, %xmm0
8359 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
8360 ; SSE3-NEXT: movdqa %xmm0, %xmm2
8361 ; SSE3-NEXT: pand %xmm1, %xmm2
8362 ; SSE3-NEXT: psrlw $2, %xmm0
8363 ; SSE3-NEXT: pand %xmm1, %xmm0
8364 ; SSE3-NEXT: paddb %xmm2, %xmm0
8365 ; SSE3-NEXT: movdqa %xmm0, %xmm1
8366 ; SSE3-NEXT: psrlw $4, %xmm1
8367 ; SSE3-NEXT: paddb %xmm0, %xmm1
8368 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8369 ; SSE3-NEXT: pxor %xmm0, %xmm0
8370 ; SSE3-NEXT: movdqa %xmm1, %xmm2
8371 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
8372 ; SSE3-NEXT: psadbw %xmm0, %xmm2
8373 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
8374 ; SSE3-NEXT: psadbw %xmm0, %xmm1
8375 ; SSE3-NEXT: packuswb %xmm2, %xmm1
8376 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9]
8377 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
8380 ; SSSE3-LABEL: ult_9_v4i32:
8382 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8383 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
8384 ; SSSE3-NEXT: pand %xmm1, %xmm2
8385 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8386 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
8387 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
8388 ; SSSE3-NEXT: psrlw $4, %xmm0
8389 ; SSSE3-NEXT: pand %xmm1, %xmm0
8390 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
8391 ; SSSE3-NEXT: paddb %xmm4, %xmm3
8392 ; SSSE3-NEXT: pxor %xmm0, %xmm0
8393 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
8394 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
8395 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
8396 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
8397 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
8398 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
8399 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9]
8400 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
8403 ; SSE41-LABEL: ult_9_v4i32:
8405 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8406 ; SSE41-NEXT: movdqa %xmm0, %xmm2
8407 ; SSE41-NEXT: pand %xmm1, %xmm2
8408 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8409 ; SSE41-NEXT: movdqa %xmm3, %xmm4
8410 ; SSE41-NEXT: pshufb %xmm2, %xmm4
8411 ; SSE41-NEXT: psrlw $4, %xmm0
8412 ; SSE41-NEXT: pand %xmm1, %xmm0
8413 ; SSE41-NEXT: pshufb %xmm0, %xmm3
8414 ; SSE41-NEXT: paddb %xmm4, %xmm3
8415 ; SSE41-NEXT: pxor %xmm0, %xmm0
8416 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
8417 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
8418 ; SSE41-NEXT: psadbw %xmm0, %xmm3
8419 ; SSE41-NEXT: psadbw %xmm0, %xmm1
8420 ; SSE41-NEXT: packuswb %xmm3, %xmm1
8421 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [9,9,9,9]
8422 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
8425 ; AVX1-LABEL: ult_9_v4i32:
8427 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8428 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
8429 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8430 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
8431 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
8432 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
8433 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
8434 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
8435 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
8436 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8437 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8438 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8439 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8440 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8441 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [9,9,9,9]
8442 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
8445 ; AVX2-LABEL: ult_9_v4i32:
8447 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8448 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
8449 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8450 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
8451 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
8452 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
8453 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
8454 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
8455 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
8456 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8457 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8458 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8459 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8460 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8461 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9]
8462 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
8465 ; AVX512VPOPCNTDQ-LABEL: ult_9_v4i32:
8466 ; AVX512VPOPCNTDQ: # %bb.0:
8467 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8468 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
8469 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9]
8470 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
8471 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
8472 ; AVX512VPOPCNTDQ-NEXT: retq
8474 ; AVX512VPOPCNTDQVL-LABEL: ult_9_v4i32:
8475 ; AVX512VPOPCNTDQVL: # %bb.0:
8476 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
8477 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9]
8478 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
8479 ; AVX512VPOPCNTDQVL-NEXT: retq
8481 ; BITALG_NOVLX-LABEL: ult_9_v4i32:
8482 ; BITALG_NOVLX: # %bb.0:
8483 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8484 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
8485 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
8486 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8487 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8488 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8489 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8490 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8491 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9]
8492 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
8493 ; BITALG_NOVLX-NEXT: vzeroupper
8494 ; BITALG_NOVLX-NEXT: retq
8496 ; BITALG-LABEL: ult_9_v4i32:
8498 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
8499 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
8500 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8501 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8502 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8503 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8504 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8505 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9]
8506 ; BITALG-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
8508 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
8509 %3 = icmp ult <4 x i32> %2, <i32 9, i32 9, i32 9, i32 9>
8510 %4 = sext <4 x i1> %3 to <4 x i32>
8514 define <4 x i32> @ugt_9_v4i32(<4 x i32> %0) {
8515 ; SSE2-LABEL: ugt_9_v4i32:
8517 ; SSE2-NEXT: movdqa %xmm0, %xmm1
8518 ; SSE2-NEXT: psrlw $1, %xmm1
8519 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8520 ; SSE2-NEXT: psubb %xmm1, %xmm0
8521 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
8522 ; SSE2-NEXT: movdqa %xmm0, %xmm2
8523 ; SSE2-NEXT: pand %xmm1, %xmm2
8524 ; SSE2-NEXT: psrlw $2, %xmm0
8525 ; SSE2-NEXT: pand %xmm1, %xmm0
8526 ; SSE2-NEXT: paddb %xmm2, %xmm0
8527 ; SSE2-NEXT: movdqa %xmm0, %xmm1
8528 ; SSE2-NEXT: psrlw $4, %xmm1
8529 ; SSE2-NEXT: paddb %xmm1, %xmm0
8530 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
8531 ; SSE2-NEXT: pxor %xmm1, %xmm1
8532 ; SSE2-NEXT: movdqa %xmm0, %xmm2
8533 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
8534 ; SSE2-NEXT: psadbw %xmm1, %xmm2
8535 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
8536 ; SSE2-NEXT: psadbw %xmm1, %xmm0
8537 ; SSE2-NEXT: packuswb %xmm2, %xmm0
8538 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
8541 ; SSE3-LABEL: ugt_9_v4i32:
8543 ; SSE3-NEXT: movdqa %xmm0, %xmm1
8544 ; SSE3-NEXT: psrlw $1, %xmm1
8545 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8546 ; SSE3-NEXT: psubb %xmm1, %xmm0
8547 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
8548 ; SSE3-NEXT: movdqa %xmm0, %xmm2
8549 ; SSE3-NEXT: pand %xmm1, %xmm2
8550 ; SSE3-NEXT: psrlw $2, %xmm0
8551 ; SSE3-NEXT: pand %xmm1, %xmm0
8552 ; SSE3-NEXT: paddb %xmm2, %xmm0
8553 ; SSE3-NEXT: movdqa %xmm0, %xmm1
8554 ; SSE3-NEXT: psrlw $4, %xmm1
8555 ; SSE3-NEXT: paddb %xmm1, %xmm0
8556 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
8557 ; SSE3-NEXT: pxor %xmm1, %xmm1
8558 ; SSE3-NEXT: movdqa %xmm0, %xmm2
8559 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
8560 ; SSE3-NEXT: psadbw %xmm1, %xmm2
8561 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
8562 ; SSE3-NEXT: psadbw %xmm1, %xmm0
8563 ; SSE3-NEXT: packuswb %xmm2, %xmm0
8564 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
8567 ; SSSE3-LABEL: ugt_9_v4i32:
8569 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8570 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
8571 ; SSSE3-NEXT: pand %xmm2, %xmm3
8572 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8573 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
8574 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
8575 ; SSSE3-NEXT: psrlw $4, %xmm0
8576 ; SSSE3-NEXT: pand %xmm2, %xmm0
8577 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
8578 ; SSSE3-NEXT: paddb %xmm4, %xmm1
8579 ; SSSE3-NEXT: pxor %xmm0, %xmm0
8580 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
8581 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
8582 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
8583 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
8584 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
8585 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
8586 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8587 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
8590 ; SSE41-LABEL: ugt_9_v4i32:
8592 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8593 ; SSE41-NEXT: movdqa %xmm0, %xmm2
8594 ; SSE41-NEXT: pand %xmm1, %xmm2
8595 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8596 ; SSE41-NEXT: movdqa %xmm3, %xmm4
8597 ; SSE41-NEXT: pshufb %xmm2, %xmm4
8598 ; SSE41-NEXT: psrlw $4, %xmm0
8599 ; SSE41-NEXT: pand %xmm1, %xmm0
8600 ; SSE41-NEXT: pshufb %xmm0, %xmm3
8601 ; SSE41-NEXT: paddb %xmm4, %xmm3
8602 ; SSE41-NEXT: pxor %xmm1, %xmm1
8603 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
8604 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
8605 ; SSE41-NEXT: psadbw %xmm1, %xmm3
8606 ; SSE41-NEXT: psadbw %xmm1, %xmm0
8607 ; SSE41-NEXT: packuswb %xmm3, %xmm0
8608 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
8611 ; AVX1-LABEL: ugt_9_v4i32:
8613 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8614 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
8615 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8616 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
8617 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
8618 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
8619 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
8620 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
8621 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
8622 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8623 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8624 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8625 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8626 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8627 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
8630 ; AVX2-LABEL: ugt_9_v4i32:
8632 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8633 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
8634 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8635 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
8636 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
8637 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
8638 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
8639 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
8640 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
8641 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8642 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8643 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8644 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8645 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8646 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9]
8647 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
8650 ; AVX512VPOPCNTDQ-LABEL: ugt_9_v4i32:
8651 ; AVX512VPOPCNTDQ: # %bb.0:
8652 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8653 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
8654 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9]
8655 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
8656 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
8657 ; AVX512VPOPCNTDQ-NEXT: retq
8659 ; AVX512VPOPCNTDQVL-LABEL: ugt_9_v4i32:
8660 ; AVX512VPOPCNTDQVL: # %bb.0:
8661 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
8662 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9]
8663 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
8664 ; AVX512VPOPCNTDQVL-NEXT: retq
8666 ; BITALG_NOVLX-LABEL: ugt_9_v4i32:
8667 ; BITALG_NOVLX: # %bb.0:
8668 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8669 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
8670 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
8671 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8672 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8673 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8674 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8675 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8676 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9]
8677 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
8678 ; BITALG_NOVLX-NEXT: vzeroupper
8679 ; BITALG_NOVLX-NEXT: retq
8681 ; BITALG-LABEL: ugt_9_v4i32:
8683 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
8684 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
8685 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8686 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8687 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8688 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8689 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8690 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9]
8691 ; BITALG-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
8693 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
8694 %3 = icmp ugt <4 x i32> %2, <i32 9, i32 9, i32 9, i32 9>
8695 %4 = sext <4 x i1> %3 to <4 x i32>
8699 define <4 x i32> @ult_10_v4i32(<4 x i32> %0) {
8700 ; SSE2-LABEL: ult_10_v4i32:
8702 ; SSE2-NEXT: movdqa %xmm0, %xmm1
8703 ; SSE2-NEXT: psrlw $1, %xmm1
8704 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8705 ; SSE2-NEXT: psubb %xmm1, %xmm0
8706 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
8707 ; SSE2-NEXT: movdqa %xmm0, %xmm2
8708 ; SSE2-NEXT: pand %xmm1, %xmm2
8709 ; SSE2-NEXT: psrlw $2, %xmm0
8710 ; SSE2-NEXT: pand %xmm1, %xmm0
8711 ; SSE2-NEXT: paddb %xmm2, %xmm0
8712 ; SSE2-NEXT: movdqa %xmm0, %xmm1
8713 ; SSE2-NEXT: psrlw $4, %xmm1
8714 ; SSE2-NEXT: paddb %xmm0, %xmm1
8715 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8716 ; SSE2-NEXT: pxor %xmm0, %xmm0
8717 ; SSE2-NEXT: movdqa %xmm1, %xmm2
8718 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
8719 ; SSE2-NEXT: psadbw %xmm0, %xmm2
8720 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
8721 ; SSE2-NEXT: psadbw %xmm0, %xmm1
8722 ; SSE2-NEXT: packuswb %xmm2, %xmm1
8723 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10]
8724 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
8727 ; SSE3-LABEL: ult_10_v4i32:
8729 ; SSE3-NEXT: movdqa %xmm0, %xmm1
8730 ; SSE3-NEXT: psrlw $1, %xmm1
8731 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8732 ; SSE3-NEXT: psubb %xmm1, %xmm0
8733 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
8734 ; SSE3-NEXT: movdqa %xmm0, %xmm2
8735 ; SSE3-NEXT: pand %xmm1, %xmm2
8736 ; SSE3-NEXT: psrlw $2, %xmm0
8737 ; SSE3-NEXT: pand %xmm1, %xmm0
8738 ; SSE3-NEXT: paddb %xmm2, %xmm0
8739 ; SSE3-NEXT: movdqa %xmm0, %xmm1
8740 ; SSE3-NEXT: psrlw $4, %xmm1
8741 ; SSE3-NEXT: paddb %xmm0, %xmm1
8742 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8743 ; SSE3-NEXT: pxor %xmm0, %xmm0
8744 ; SSE3-NEXT: movdqa %xmm1, %xmm2
8745 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
8746 ; SSE3-NEXT: psadbw %xmm0, %xmm2
8747 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
8748 ; SSE3-NEXT: psadbw %xmm0, %xmm1
8749 ; SSE3-NEXT: packuswb %xmm2, %xmm1
8750 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10]
8751 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
8754 ; SSSE3-LABEL: ult_10_v4i32:
8756 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8757 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
8758 ; SSSE3-NEXT: pand %xmm1, %xmm2
8759 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8760 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
8761 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
8762 ; SSSE3-NEXT: psrlw $4, %xmm0
8763 ; SSSE3-NEXT: pand %xmm1, %xmm0
8764 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
8765 ; SSSE3-NEXT: paddb %xmm4, %xmm3
8766 ; SSSE3-NEXT: pxor %xmm0, %xmm0
8767 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
8768 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
8769 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
8770 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
8771 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
8772 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
8773 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10]
8774 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
8777 ; SSE41-LABEL: ult_10_v4i32:
8779 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8780 ; SSE41-NEXT: movdqa %xmm0, %xmm2
8781 ; SSE41-NEXT: pand %xmm1, %xmm2
8782 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8783 ; SSE41-NEXT: movdqa %xmm3, %xmm4
8784 ; SSE41-NEXT: pshufb %xmm2, %xmm4
8785 ; SSE41-NEXT: psrlw $4, %xmm0
8786 ; SSE41-NEXT: pand %xmm1, %xmm0
8787 ; SSE41-NEXT: pshufb %xmm0, %xmm3
8788 ; SSE41-NEXT: paddb %xmm4, %xmm3
8789 ; SSE41-NEXT: pxor %xmm0, %xmm0
8790 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
8791 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
8792 ; SSE41-NEXT: psadbw %xmm0, %xmm3
8793 ; SSE41-NEXT: psadbw %xmm0, %xmm1
8794 ; SSE41-NEXT: packuswb %xmm3, %xmm1
8795 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [10,10,10,10]
8796 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
8799 ; AVX1-LABEL: ult_10_v4i32:
8801 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8802 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
8803 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8804 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
8805 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
8806 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
8807 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
8808 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
8809 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
8810 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8811 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8812 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8813 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8814 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8815 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [10,10,10,10]
8816 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
8819 ; AVX2-LABEL: ult_10_v4i32:
8821 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8822 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
8823 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8824 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
8825 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
8826 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
8827 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
8828 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
8829 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
8830 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8831 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8832 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8833 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8834 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8835 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10]
8836 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
8839 ; AVX512VPOPCNTDQ-LABEL: ult_10_v4i32:
8840 ; AVX512VPOPCNTDQ: # %bb.0:
8841 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8842 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
8843 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10]
8844 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
8845 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
8846 ; AVX512VPOPCNTDQ-NEXT: retq
8848 ; AVX512VPOPCNTDQVL-LABEL: ult_10_v4i32:
8849 ; AVX512VPOPCNTDQVL: # %bb.0:
8850 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
8851 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10]
8852 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
8853 ; AVX512VPOPCNTDQVL-NEXT: retq
8855 ; BITALG_NOVLX-LABEL: ult_10_v4i32:
8856 ; BITALG_NOVLX: # %bb.0:
8857 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
8858 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
8859 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
8860 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8861 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8862 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8863 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8864 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8865 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10]
8866 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
8867 ; BITALG_NOVLX-NEXT: vzeroupper
8868 ; BITALG_NOVLX-NEXT: retq
8870 ; BITALG-LABEL: ult_10_v4i32:
8872 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
8873 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
8874 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8875 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8876 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8877 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
8878 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
8879 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10]
8880 ; BITALG-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
8882 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
8883 %3 = icmp ult <4 x i32> %2, <i32 10, i32 10, i32 10, i32 10>
8884 %4 = sext <4 x i1> %3 to <4 x i32>
8888 define <4 x i32> @ugt_10_v4i32(<4 x i32> %0) {
8889 ; SSE2-LABEL: ugt_10_v4i32:
8891 ; SSE2-NEXT: movdqa %xmm0, %xmm1
8892 ; SSE2-NEXT: psrlw $1, %xmm1
8893 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8894 ; SSE2-NEXT: psubb %xmm1, %xmm0
8895 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
8896 ; SSE2-NEXT: movdqa %xmm0, %xmm2
8897 ; SSE2-NEXT: pand %xmm1, %xmm2
8898 ; SSE2-NEXT: psrlw $2, %xmm0
8899 ; SSE2-NEXT: pand %xmm1, %xmm0
8900 ; SSE2-NEXT: paddb %xmm2, %xmm0
8901 ; SSE2-NEXT: movdqa %xmm0, %xmm1
8902 ; SSE2-NEXT: psrlw $4, %xmm1
8903 ; SSE2-NEXT: paddb %xmm1, %xmm0
8904 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
8905 ; SSE2-NEXT: pxor %xmm1, %xmm1
8906 ; SSE2-NEXT: movdqa %xmm0, %xmm2
8907 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
8908 ; SSE2-NEXT: psadbw %xmm1, %xmm2
8909 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
8910 ; SSE2-NEXT: psadbw %xmm1, %xmm0
8911 ; SSE2-NEXT: packuswb %xmm2, %xmm0
8912 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
8915 ; SSE3-LABEL: ugt_10_v4i32:
8917 ; SSE3-NEXT: movdqa %xmm0, %xmm1
8918 ; SSE3-NEXT: psrlw $1, %xmm1
8919 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8920 ; SSE3-NEXT: psubb %xmm1, %xmm0
8921 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
8922 ; SSE3-NEXT: movdqa %xmm0, %xmm2
8923 ; SSE3-NEXT: pand %xmm1, %xmm2
8924 ; SSE3-NEXT: psrlw $2, %xmm0
8925 ; SSE3-NEXT: pand %xmm1, %xmm0
8926 ; SSE3-NEXT: paddb %xmm2, %xmm0
8927 ; SSE3-NEXT: movdqa %xmm0, %xmm1
8928 ; SSE3-NEXT: psrlw $4, %xmm1
8929 ; SSE3-NEXT: paddb %xmm1, %xmm0
8930 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
8931 ; SSE3-NEXT: pxor %xmm1, %xmm1
8932 ; SSE3-NEXT: movdqa %xmm0, %xmm2
8933 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
8934 ; SSE3-NEXT: psadbw %xmm1, %xmm2
8935 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
8936 ; SSE3-NEXT: psadbw %xmm1, %xmm0
8937 ; SSE3-NEXT: packuswb %xmm2, %xmm0
8938 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
8941 ; SSSE3-LABEL: ugt_10_v4i32:
8943 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8944 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
8945 ; SSSE3-NEXT: pand %xmm2, %xmm3
8946 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8947 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
8948 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
8949 ; SSSE3-NEXT: psrlw $4, %xmm0
8950 ; SSSE3-NEXT: pand %xmm2, %xmm0
8951 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
8952 ; SSSE3-NEXT: paddb %xmm4, %xmm1
8953 ; SSSE3-NEXT: pxor %xmm0, %xmm0
8954 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
8955 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
8956 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
8957 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
8958 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
8959 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
8960 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
8961 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
8964 ; SSE41-LABEL: ugt_10_v4i32:
8966 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8967 ; SSE41-NEXT: movdqa %xmm0, %xmm2
8968 ; SSE41-NEXT: pand %xmm1, %xmm2
8969 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8970 ; SSE41-NEXT: movdqa %xmm3, %xmm4
8971 ; SSE41-NEXT: pshufb %xmm2, %xmm4
8972 ; SSE41-NEXT: psrlw $4, %xmm0
8973 ; SSE41-NEXT: pand %xmm1, %xmm0
8974 ; SSE41-NEXT: pshufb %xmm0, %xmm3
8975 ; SSE41-NEXT: paddb %xmm4, %xmm3
8976 ; SSE41-NEXT: pxor %xmm1, %xmm1
8977 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
8978 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
8979 ; SSE41-NEXT: psadbw %xmm1, %xmm3
8980 ; SSE41-NEXT: psadbw %xmm1, %xmm0
8981 ; SSE41-NEXT: packuswb %xmm3, %xmm0
8982 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
8985 ; AVX1-LABEL: ugt_10_v4i32:
8987 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
8988 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
8989 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
8990 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
8991 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
8992 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
8993 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
8994 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
8995 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
8996 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
8997 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
8998 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
8999 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9000 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9001 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
9004 ; AVX2-LABEL: ugt_10_v4i32:
9006 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9007 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
9008 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9009 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
9010 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
9011 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
9012 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
9013 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
9014 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
9015 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9016 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9017 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9018 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9019 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9020 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10]
9021 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
9024 ; AVX512VPOPCNTDQ-LABEL: ugt_10_v4i32:
9025 ; AVX512VPOPCNTDQ: # %bb.0:
9026 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
9027 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
9028 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10]
9029 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
9030 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
9031 ; AVX512VPOPCNTDQ-NEXT: retq
9033 ; AVX512VPOPCNTDQVL-LABEL: ugt_10_v4i32:
9034 ; AVX512VPOPCNTDQVL: # %bb.0:
9035 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
9036 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10]
9037 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
9038 ; AVX512VPOPCNTDQVL-NEXT: retq
9040 ; BITALG_NOVLX-LABEL: ugt_10_v4i32:
9041 ; BITALG_NOVLX: # %bb.0:
9042 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
9043 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
9044 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
9045 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9046 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9047 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9048 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9049 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9050 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10]
9051 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
9052 ; BITALG_NOVLX-NEXT: vzeroupper
9053 ; BITALG_NOVLX-NEXT: retq
9055 ; BITALG-LABEL: ugt_10_v4i32:
9057 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
9058 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
9059 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9060 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9061 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9062 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9063 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9064 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [10,10,10,10]
9065 ; BITALG-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
9067 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
9068 %3 = icmp ugt <4 x i32> %2, <i32 10, i32 10, i32 10, i32 10>
9069 %4 = sext <4 x i1> %3 to <4 x i32>
9073 define <4 x i32> @ult_11_v4i32(<4 x i32> %0) {
9074 ; SSE2-LABEL: ult_11_v4i32:
9076 ; SSE2-NEXT: movdqa %xmm0, %xmm1
9077 ; SSE2-NEXT: psrlw $1, %xmm1
9078 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9079 ; SSE2-NEXT: psubb %xmm1, %xmm0
9080 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9081 ; SSE2-NEXT: movdqa %xmm0, %xmm2
9082 ; SSE2-NEXT: pand %xmm1, %xmm2
9083 ; SSE2-NEXT: psrlw $2, %xmm0
9084 ; SSE2-NEXT: pand %xmm1, %xmm0
9085 ; SSE2-NEXT: paddb %xmm2, %xmm0
9086 ; SSE2-NEXT: movdqa %xmm0, %xmm1
9087 ; SSE2-NEXT: psrlw $4, %xmm1
9088 ; SSE2-NEXT: paddb %xmm0, %xmm1
9089 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9090 ; SSE2-NEXT: pxor %xmm0, %xmm0
9091 ; SSE2-NEXT: movdqa %xmm1, %xmm2
9092 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
9093 ; SSE2-NEXT: psadbw %xmm0, %xmm2
9094 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9095 ; SSE2-NEXT: psadbw %xmm0, %xmm1
9096 ; SSE2-NEXT: packuswb %xmm2, %xmm1
9097 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11]
9098 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
9101 ; SSE3-LABEL: ult_11_v4i32:
9103 ; SSE3-NEXT: movdqa %xmm0, %xmm1
9104 ; SSE3-NEXT: psrlw $1, %xmm1
9105 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9106 ; SSE3-NEXT: psubb %xmm1, %xmm0
9107 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9108 ; SSE3-NEXT: movdqa %xmm0, %xmm2
9109 ; SSE3-NEXT: pand %xmm1, %xmm2
9110 ; SSE3-NEXT: psrlw $2, %xmm0
9111 ; SSE3-NEXT: pand %xmm1, %xmm0
9112 ; SSE3-NEXT: paddb %xmm2, %xmm0
9113 ; SSE3-NEXT: movdqa %xmm0, %xmm1
9114 ; SSE3-NEXT: psrlw $4, %xmm1
9115 ; SSE3-NEXT: paddb %xmm0, %xmm1
9116 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9117 ; SSE3-NEXT: pxor %xmm0, %xmm0
9118 ; SSE3-NEXT: movdqa %xmm1, %xmm2
9119 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
9120 ; SSE3-NEXT: psadbw %xmm0, %xmm2
9121 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9122 ; SSE3-NEXT: psadbw %xmm0, %xmm1
9123 ; SSE3-NEXT: packuswb %xmm2, %xmm1
9124 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11]
9125 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
9128 ; SSSE3-LABEL: ult_11_v4i32:
9130 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9131 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
9132 ; SSSE3-NEXT: pand %xmm1, %xmm2
9133 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9134 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
9135 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
9136 ; SSSE3-NEXT: psrlw $4, %xmm0
9137 ; SSSE3-NEXT: pand %xmm1, %xmm0
9138 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
9139 ; SSSE3-NEXT: paddb %xmm4, %xmm3
9140 ; SSSE3-NEXT: pxor %xmm0, %xmm0
9141 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
9142 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
9143 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
9144 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
9145 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
9146 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
9147 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11]
9148 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
9151 ; SSE41-LABEL: ult_11_v4i32:
9153 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9154 ; SSE41-NEXT: movdqa %xmm0, %xmm2
9155 ; SSE41-NEXT: pand %xmm1, %xmm2
9156 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9157 ; SSE41-NEXT: movdqa %xmm3, %xmm4
9158 ; SSE41-NEXT: pshufb %xmm2, %xmm4
9159 ; SSE41-NEXT: psrlw $4, %xmm0
9160 ; SSE41-NEXT: pand %xmm1, %xmm0
9161 ; SSE41-NEXT: pshufb %xmm0, %xmm3
9162 ; SSE41-NEXT: paddb %xmm4, %xmm3
9163 ; SSE41-NEXT: pxor %xmm0, %xmm0
9164 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
9165 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
9166 ; SSE41-NEXT: psadbw %xmm0, %xmm3
9167 ; SSE41-NEXT: psadbw %xmm0, %xmm1
9168 ; SSE41-NEXT: packuswb %xmm3, %xmm1
9169 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [11,11,11,11]
9170 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
9173 ; AVX1-LABEL: ult_11_v4i32:
9175 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9176 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
9177 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9178 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
9179 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
9180 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
9181 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
9182 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
9183 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
9184 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9185 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9186 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9187 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9188 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9189 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [11,11,11,11]
9190 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
9193 ; AVX2-LABEL: ult_11_v4i32:
9195 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9196 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
9197 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9198 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
9199 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
9200 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
9201 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
9202 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
9203 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
9204 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9205 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9206 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9207 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9208 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9209 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11]
9210 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
9213 ; AVX512VPOPCNTDQ-LABEL: ult_11_v4i32:
9214 ; AVX512VPOPCNTDQ: # %bb.0:
9215 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
9216 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
9217 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11]
9218 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
9219 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
9220 ; AVX512VPOPCNTDQ-NEXT: retq
9222 ; AVX512VPOPCNTDQVL-LABEL: ult_11_v4i32:
9223 ; AVX512VPOPCNTDQVL: # %bb.0:
9224 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
9225 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11]
9226 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
9227 ; AVX512VPOPCNTDQVL-NEXT: retq
9229 ; BITALG_NOVLX-LABEL: ult_11_v4i32:
9230 ; BITALG_NOVLX: # %bb.0:
9231 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
9232 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
9233 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
9234 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9235 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9236 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9237 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9238 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9239 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11]
9240 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
9241 ; BITALG_NOVLX-NEXT: vzeroupper
9242 ; BITALG_NOVLX-NEXT: retq
9244 ; BITALG-LABEL: ult_11_v4i32:
9246 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
9247 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
9248 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9249 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9250 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9251 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9252 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9253 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11]
9254 ; BITALG-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
9256 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
9257 %3 = icmp ult <4 x i32> %2, <i32 11, i32 11, i32 11, i32 11>
9258 %4 = sext <4 x i1> %3 to <4 x i32>
9262 define <4 x i32> @ugt_11_v4i32(<4 x i32> %0) {
9263 ; SSE2-LABEL: ugt_11_v4i32:
9265 ; SSE2-NEXT: movdqa %xmm0, %xmm1
9266 ; SSE2-NEXT: psrlw $1, %xmm1
9267 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9268 ; SSE2-NEXT: psubb %xmm1, %xmm0
9269 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9270 ; SSE2-NEXT: movdqa %xmm0, %xmm2
9271 ; SSE2-NEXT: pand %xmm1, %xmm2
9272 ; SSE2-NEXT: psrlw $2, %xmm0
9273 ; SSE2-NEXT: pand %xmm1, %xmm0
9274 ; SSE2-NEXT: paddb %xmm2, %xmm0
9275 ; SSE2-NEXT: movdqa %xmm0, %xmm1
9276 ; SSE2-NEXT: psrlw $4, %xmm1
9277 ; SSE2-NEXT: paddb %xmm1, %xmm0
9278 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
9279 ; SSE2-NEXT: pxor %xmm1, %xmm1
9280 ; SSE2-NEXT: movdqa %xmm0, %xmm2
9281 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
9282 ; SSE2-NEXT: psadbw %xmm1, %xmm2
9283 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
9284 ; SSE2-NEXT: psadbw %xmm1, %xmm0
9285 ; SSE2-NEXT: packuswb %xmm2, %xmm0
9286 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
9289 ; SSE3-LABEL: ugt_11_v4i32:
9291 ; SSE3-NEXT: movdqa %xmm0, %xmm1
9292 ; SSE3-NEXT: psrlw $1, %xmm1
9293 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9294 ; SSE3-NEXT: psubb %xmm1, %xmm0
9295 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9296 ; SSE3-NEXT: movdqa %xmm0, %xmm2
9297 ; SSE3-NEXT: pand %xmm1, %xmm2
9298 ; SSE3-NEXT: psrlw $2, %xmm0
9299 ; SSE3-NEXT: pand %xmm1, %xmm0
9300 ; SSE3-NEXT: paddb %xmm2, %xmm0
9301 ; SSE3-NEXT: movdqa %xmm0, %xmm1
9302 ; SSE3-NEXT: psrlw $4, %xmm1
9303 ; SSE3-NEXT: paddb %xmm1, %xmm0
9304 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
9305 ; SSE3-NEXT: pxor %xmm1, %xmm1
9306 ; SSE3-NEXT: movdqa %xmm0, %xmm2
9307 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
9308 ; SSE3-NEXT: psadbw %xmm1, %xmm2
9309 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
9310 ; SSE3-NEXT: psadbw %xmm1, %xmm0
9311 ; SSE3-NEXT: packuswb %xmm2, %xmm0
9312 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
9315 ; SSSE3-LABEL: ugt_11_v4i32:
9317 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9318 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
9319 ; SSSE3-NEXT: pand %xmm2, %xmm3
9320 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9321 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
9322 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
9323 ; SSSE3-NEXT: psrlw $4, %xmm0
9324 ; SSSE3-NEXT: pand %xmm2, %xmm0
9325 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
9326 ; SSSE3-NEXT: paddb %xmm4, %xmm1
9327 ; SSSE3-NEXT: pxor %xmm0, %xmm0
9328 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
9329 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
9330 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
9331 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9332 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
9333 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
9334 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9335 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
9338 ; SSE41-LABEL: ugt_11_v4i32:
9340 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9341 ; SSE41-NEXT: movdqa %xmm0, %xmm2
9342 ; SSE41-NEXT: pand %xmm1, %xmm2
9343 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9344 ; SSE41-NEXT: movdqa %xmm3, %xmm4
9345 ; SSE41-NEXT: pshufb %xmm2, %xmm4
9346 ; SSE41-NEXT: psrlw $4, %xmm0
9347 ; SSE41-NEXT: pand %xmm1, %xmm0
9348 ; SSE41-NEXT: pshufb %xmm0, %xmm3
9349 ; SSE41-NEXT: paddb %xmm4, %xmm3
9350 ; SSE41-NEXT: pxor %xmm1, %xmm1
9351 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
9352 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
9353 ; SSE41-NEXT: psadbw %xmm1, %xmm3
9354 ; SSE41-NEXT: psadbw %xmm1, %xmm0
9355 ; SSE41-NEXT: packuswb %xmm3, %xmm0
9356 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
9359 ; AVX1-LABEL: ugt_11_v4i32:
9361 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9362 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
9363 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9364 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
9365 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
9366 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
9367 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
9368 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
9369 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
9370 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9371 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9372 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9373 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9374 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9375 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
9378 ; AVX2-LABEL: ugt_11_v4i32:
9380 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9381 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
9382 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9383 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
9384 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
9385 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
9386 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
9387 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
9388 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
9389 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9390 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9391 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9392 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9393 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9394 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11]
9395 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
9398 ; AVX512VPOPCNTDQ-LABEL: ugt_11_v4i32:
9399 ; AVX512VPOPCNTDQ: # %bb.0:
9400 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
9401 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
9402 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11]
9403 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
9404 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
9405 ; AVX512VPOPCNTDQ-NEXT: retq
9407 ; AVX512VPOPCNTDQVL-LABEL: ugt_11_v4i32:
9408 ; AVX512VPOPCNTDQVL: # %bb.0:
9409 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
9410 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11]
9411 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
9412 ; AVX512VPOPCNTDQVL-NEXT: retq
9414 ; BITALG_NOVLX-LABEL: ugt_11_v4i32:
9415 ; BITALG_NOVLX: # %bb.0:
9416 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
9417 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
9418 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
9419 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9420 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9421 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9422 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9423 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9424 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11]
9425 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
9426 ; BITALG_NOVLX-NEXT: vzeroupper
9427 ; BITALG_NOVLX-NEXT: retq
9429 ; BITALG-LABEL: ugt_11_v4i32:
9431 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
9432 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
9433 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9434 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9435 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9436 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9437 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9438 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [11,11,11,11]
9439 ; BITALG-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
9441 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
9442 %3 = icmp ugt <4 x i32> %2, <i32 11, i32 11, i32 11, i32 11>
9443 %4 = sext <4 x i1> %3 to <4 x i32>
9447 define <4 x i32> @ult_12_v4i32(<4 x i32> %0) {
9448 ; SSE2-LABEL: ult_12_v4i32:
9450 ; SSE2-NEXT: movdqa %xmm0, %xmm1
9451 ; SSE2-NEXT: psrlw $1, %xmm1
9452 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9453 ; SSE2-NEXT: psubb %xmm1, %xmm0
9454 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9455 ; SSE2-NEXT: movdqa %xmm0, %xmm2
9456 ; SSE2-NEXT: pand %xmm1, %xmm2
9457 ; SSE2-NEXT: psrlw $2, %xmm0
9458 ; SSE2-NEXT: pand %xmm1, %xmm0
9459 ; SSE2-NEXT: paddb %xmm2, %xmm0
9460 ; SSE2-NEXT: movdqa %xmm0, %xmm1
9461 ; SSE2-NEXT: psrlw $4, %xmm1
9462 ; SSE2-NEXT: paddb %xmm0, %xmm1
9463 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9464 ; SSE2-NEXT: pxor %xmm0, %xmm0
9465 ; SSE2-NEXT: movdqa %xmm1, %xmm2
9466 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
9467 ; SSE2-NEXT: psadbw %xmm0, %xmm2
9468 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9469 ; SSE2-NEXT: psadbw %xmm0, %xmm1
9470 ; SSE2-NEXT: packuswb %xmm2, %xmm1
9471 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12]
9472 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
9475 ; SSE3-LABEL: ult_12_v4i32:
9477 ; SSE3-NEXT: movdqa %xmm0, %xmm1
9478 ; SSE3-NEXT: psrlw $1, %xmm1
9479 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9480 ; SSE3-NEXT: psubb %xmm1, %xmm0
9481 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9482 ; SSE3-NEXT: movdqa %xmm0, %xmm2
9483 ; SSE3-NEXT: pand %xmm1, %xmm2
9484 ; SSE3-NEXT: psrlw $2, %xmm0
9485 ; SSE3-NEXT: pand %xmm1, %xmm0
9486 ; SSE3-NEXT: paddb %xmm2, %xmm0
9487 ; SSE3-NEXT: movdqa %xmm0, %xmm1
9488 ; SSE3-NEXT: psrlw $4, %xmm1
9489 ; SSE3-NEXT: paddb %xmm0, %xmm1
9490 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9491 ; SSE3-NEXT: pxor %xmm0, %xmm0
9492 ; SSE3-NEXT: movdqa %xmm1, %xmm2
9493 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
9494 ; SSE3-NEXT: psadbw %xmm0, %xmm2
9495 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9496 ; SSE3-NEXT: psadbw %xmm0, %xmm1
9497 ; SSE3-NEXT: packuswb %xmm2, %xmm1
9498 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12]
9499 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
9502 ; SSSE3-LABEL: ult_12_v4i32:
9504 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9505 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
9506 ; SSSE3-NEXT: pand %xmm1, %xmm2
9507 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9508 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
9509 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
9510 ; SSSE3-NEXT: psrlw $4, %xmm0
9511 ; SSSE3-NEXT: pand %xmm1, %xmm0
9512 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
9513 ; SSSE3-NEXT: paddb %xmm4, %xmm3
9514 ; SSSE3-NEXT: pxor %xmm0, %xmm0
9515 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
9516 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
9517 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
9518 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
9519 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
9520 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
9521 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12]
9522 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
9525 ; SSE41-LABEL: ult_12_v4i32:
9527 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9528 ; SSE41-NEXT: movdqa %xmm0, %xmm2
9529 ; SSE41-NEXT: pand %xmm1, %xmm2
9530 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9531 ; SSE41-NEXT: movdqa %xmm3, %xmm4
9532 ; SSE41-NEXT: pshufb %xmm2, %xmm4
9533 ; SSE41-NEXT: psrlw $4, %xmm0
9534 ; SSE41-NEXT: pand %xmm1, %xmm0
9535 ; SSE41-NEXT: pshufb %xmm0, %xmm3
9536 ; SSE41-NEXT: paddb %xmm4, %xmm3
9537 ; SSE41-NEXT: pxor %xmm0, %xmm0
9538 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
9539 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
9540 ; SSE41-NEXT: psadbw %xmm0, %xmm3
9541 ; SSE41-NEXT: psadbw %xmm0, %xmm1
9542 ; SSE41-NEXT: packuswb %xmm3, %xmm1
9543 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [12,12,12,12]
9544 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
9547 ; AVX1-LABEL: ult_12_v4i32:
9549 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9550 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
9551 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9552 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
9553 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
9554 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
9555 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
9556 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
9557 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
9558 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9559 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9560 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9561 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9562 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9563 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [12,12,12,12]
9564 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
9567 ; AVX2-LABEL: ult_12_v4i32:
9569 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9570 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
9571 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9572 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
9573 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
9574 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
9575 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
9576 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
9577 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
9578 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9579 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9580 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9581 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9582 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9583 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12]
9584 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
9587 ; AVX512VPOPCNTDQ-LABEL: ult_12_v4i32:
9588 ; AVX512VPOPCNTDQ: # %bb.0:
9589 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
9590 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
9591 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12]
9592 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
9593 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
9594 ; AVX512VPOPCNTDQ-NEXT: retq
9596 ; AVX512VPOPCNTDQVL-LABEL: ult_12_v4i32:
9597 ; AVX512VPOPCNTDQVL: # %bb.0:
9598 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
9599 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12]
9600 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
9601 ; AVX512VPOPCNTDQVL-NEXT: retq
9603 ; BITALG_NOVLX-LABEL: ult_12_v4i32:
9604 ; BITALG_NOVLX: # %bb.0:
9605 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
9606 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
9607 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
9608 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9609 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9610 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9611 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9612 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9613 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12]
9614 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
9615 ; BITALG_NOVLX-NEXT: vzeroupper
9616 ; BITALG_NOVLX-NEXT: retq
9618 ; BITALG-LABEL: ult_12_v4i32:
9620 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
9621 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
9622 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9623 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9624 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9625 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9626 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9627 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12]
9628 ; BITALG-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
9630 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
9631 %3 = icmp ult <4 x i32> %2, <i32 12, i32 12, i32 12, i32 12>
9632 %4 = sext <4 x i1> %3 to <4 x i32>
9636 define <4 x i32> @ugt_12_v4i32(<4 x i32> %0) {
9637 ; SSE2-LABEL: ugt_12_v4i32:
9639 ; SSE2-NEXT: movdqa %xmm0, %xmm1
9640 ; SSE2-NEXT: psrlw $1, %xmm1
9641 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9642 ; SSE2-NEXT: psubb %xmm1, %xmm0
9643 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9644 ; SSE2-NEXT: movdqa %xmm0, %xmm2
9645 ; SSE2-NEXT: pand %xmm1, %xmm2
9646 ; SSE2-NEXT: psrlw $2, %xmm0
9647 ; SSE2-NEXT: pand %xmm1, %xmm0
9648 ; SSE2-NEXT: paddb %xmm2, %xmm0
9649 ; SSE2-NEXT: movdqa %xmm0, %xmm1
9650 ; SSE2-NEXT: psrlw $4, %xmm1
9651 ; SSE2-NEXT: paddb %xmm1, %xmm0
9652 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
9653 ; SSE2-NEXT: pxor %xmm1, %xmm1
9654 ; SSE2-NEXT: movdqa %xmm0, %xmm2
9655 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
9656 ; SSE2-NEXT: psadbw %xmm1, %xmm2
9657 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
9658 ; SSE2-NEXT: psadbw %xmm1, %xmm0
9659 ; SSE2-NEXT: packuswb %xmm2, %xmm0
9660 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
9663 ; SSE3-LABEL: ugt_12_v4i32:
9665 ; SSE3-NEXT: movdqa %xmm0, %xmm1
9666 ; SSE3-NEXT: psrlw $1, %xmm1
9667 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9668 ; SSE3-NEXT: psubb %xmm1, %xmm0
9669 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9670 ; SSE3-NEXT: movdqa %xmm0, %xmm2
9671 ; SSE3-NEXT: pand %xmm1, %xmm2
9672 ; SSE3-NEXT: psrlw $2, %xmm0
9673 ; SSE3-NEXT: pand %xmm1, %xmm0
9674 ; SSE3-NEXT: paddb %xmm2, %xmm0
9675 ; SSE3-NEXT: movdqa %xmm0, %xmm1
9676 ; SSE3-NEXT: psrlw $4, %xmm1
9677 ; SSE3-NEXT: paddb %xmm1, %xmm0
9678 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
9679 ; SSE3-NEXT: pxor %xmm1, %xmm1
9680 ; SSE3-NEXT: movdqa %xmm0, %xmm2
9681 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
9682 ; SSE3-NEXT: psadbw %xmm1, %xmm2
9683 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
9684 ; SSE3-NEXT: psadbw %xmm1, %xmm0
9685 ; SSE3-NEXT: packuswb %xmm2, %xmm0
9686 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
9689 ; SSSE3-LABEL: ugt_12_v4i32:
9691 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9692 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
9693 ; SSSE3-NEXT: pand %xmm2, %xmm3
9694 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9695 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
9696 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
9697 ; SSSE3-NEXT: psrlw $4, %xmm0
9698 ; SSSE3-NEXT: pand %xmm2, %xmm0
9699 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
9700 ; SSSE3-NEXT: paddb %xmm4, %xmm1
9701 ; SSSE3-NEXT: pxor %xmm0, %xmm0
9702 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
9703 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
9704 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
9705 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9706 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
9707 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
9708 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9709 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
9712 ; SSE41-LABEL: ugt_12_v4i32:
9714 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9715 ; SSE41-NEXT: movdqa %xmm0, %xmm2
9716 ; SSE41-NEXT: pand %xmm1, %xmm2
9717 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9718 ; SSE41-NEXT: movdqa %xmm3, %xmm4
9719 ; SSE41-NEXT: pshufb %xmm2, %xmm4
9720 ; SSE41-NEXT: psrlw $4, %xmm0
9721 ; SSE41-NEXT: pand %xmm1, %xmm0
9722 ; SSE41-NEXT: pshufb %xmm0, %xmm3
9723 ; SSE41-NEXT: paddb %xmm4, %xmm3
9724 ; SSE41-NEXT: pxor %xmm1, %xmm1
9725 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
9726 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
9727 ; SSE41-NEXT: psadbw %xmm1, %xmm3
9728 ; SSE41-NEXT: psadbw %xmm1, %xmm0
9729 ; SSE41-NEXT: packuswb %xmm3, %xmm0
9730 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
9733 ; AVX1-LABEL: ugt_12_v4i32:
9735 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9736 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
9737 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9738 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
9739 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
9740 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
9741 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
9742 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
9743 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
9744 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9745 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9746 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9747 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9748 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9749 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
9752 ; AVX2-LABEL: ugt_12_v4i32:
9754 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9755 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
9756 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9757 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
9758 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
9759 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
9760 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
9761 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
9762 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
9763 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9764 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9765 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9766 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9767 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9768 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12]
9769 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
9772 ; AVX512VPOPCNTDQ-LABEL: ugt_12_v4i32:
9773 ; AVX512VPOPCNTDQ: # %bb.0:
9774 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
9775 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
9776 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12]
9777 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
9778 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
9779 ; AVX512VPOPCNTDQ-NEXT: retq
9781 ; AVX512VPOPCNTDQVL-LABEL: ugt_12_v4i32:
9782 ; AVX512VPOPCNTDQVL: # %bb.0:
9783 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
9784 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12]
9785 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
9786 ; AVX512VPOPCNTDQVL-NEXT: retq
9788 ; BITALG_NOVLX-LABEL: ugt_12_v4i32:
9789 ; BITALG_NOVLX: # %bb.0:
9790 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
9791 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
9792 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
9793 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9794 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9795 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9796 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9797 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9798 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12]
9799 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
9800 ; BITALG_NOVLX-NEXT: vzeroupper
9801 ; BITALG_NOVLX-NEXT: retq
9803 ; BITALG-LABEL: ugt_12_v4i32:
9805 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
9806 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
9807 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9808 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9809 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9810 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9811 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9812 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12]
9813 ; BITALG-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
9815 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
9816 %3 = icmp ugt <4 x i32> %2, <i32 12, i32 12, i32 12, i32 12>
9817 %4 = sext <4 x i1> %3 to <4 x i32>
9821 define <4 x i32> @ult_13_v4i32(<4 x i32> %0) {
9822 ; SSE2-LABEL: ult_13_v4i32:
9824 ; SSE2-NEXT: movdqa %xmm0, %xmm1
9825 ; SSE2-NEXT: psrlw $1, %xmm1
9826 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9827 ; SSE2-NEXT: psubb %xmm1, %xmm0
9828 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9829 ; SSE2-NEXT: movdqa %xmm0, %xmm2
9830 ; SSE2-NEXT: pand %xmm1, %xmm2
9831 ; SSE2-NEXT: psrlw $2, %xmm0
9832 ; SSE2-NEXT: pand %xmm1, %xmm0
9833 ; SSE2-NEXT: paddb %xmm2, %xmm0
9834 ; SSE2-NEXT: movdqa %xmm0, %xmm1
9835 ; SSE2-NEXT: psrlw $4, %xmm1
9836 ; SSE2-NEXT: paddb %xmm0, %xmm1
9837 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9838 ; SSE2-NEXT: pxor %xmm0, %xmm0
9839 ; SSE2-NEXT: movdqa %xmm1, %xmm2
9840 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
9841 ; SSE2-NEXT: psadbw %xmm0, %xmm2
9842 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9843 ; SSE2-NEXT: psadbw %xmm0, %xmm1
9844 ; SSE2-NEXT: packuswb %xmm2, %xmm1
9845 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13]
9846 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
9849 ; SSE3-LABEL: ult_13_v4i32:
9851 ; SSE3-NEXT: movdqa %xmm0, %xmm1
9852 ; SSE3-NEXT: psrlw $1, %xmm1
9853 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9854 ; SSE3-NEXT: psubb %xmm1, %xmm0
9855 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
9856 ; SSE3-NEXT: movdqa %xmm0, %xmm2
9857 ; SSE3-NEXT: pand %xmm1, %xmm2
9858 ; SSE3-NEXT: psrlw $2, %xmm0
9859 ; SSE3-NEXT: pand %xmm1, %xmm0
9860 ; SSE3-NEXT: paddb %xmm2, %xmm0
9861 ; SSE3-NEXT: movdqa %xmm0, %xmm1
9862 ; SSE3-NEXT: psrlw $4, %xmm1
9863 ; SSE3-NEXT: paddb %xmm0, %xmm1
9864 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
9865 ; SSE3-NEXT: pxor %xmm0, %xmm0
9866 ; SSE3-NEXT: movdqa %xmm1, %xmm2
9867 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
9868 ; SSE3-NEXT: psadbw %xmm0, %xmm2
9869 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
9870 ; SSE3-NEXT: psadbw %xmm0, %xmm1
9871 ; SSE3-NEXT: packuswb %xmm2, %xmm1
9872 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13]
9873 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
9876 ; SSSE3-LABEL: ult_13_v4i32:
9878 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9879 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
9880 ; SSSE3-NEXT: pand %xmm1, %xmm2
9881 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9882 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
9883 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
9884 ; SSSE3-NEXT: psrlw $4, %xmm0
9885 ; SSSE3-NEXT: pand %xmm1, %xmm0
9886 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
9887 ; SSSE3-NEXT: paddb %xmm4, %xmm3
9888 ; SSSE3-NEXT: pxor %xmm0, %xmm0
9889 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
9890 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
9891 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
9892 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
9893 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
9894 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
9895 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13]
9896 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
9899 ; SSE41-LABEL: ult_13_v4i32:
9901 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9902 ; SSE41-NEXT: movdqa %xmm0, %xmm2
9903 ; SSE41-NEXT: pand %xmm1, %xmm2
9904 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9905 ; SSE41-NEXT: movdqa %xmm3, %xmm4
9906 ; SSE41-NEXT: pshufb %xmm2, %xmm4
9907 ; SSE41-NEXT: psrlw $4, %xmm0
9908 ; SSE41-NEXT: pand %xmm1, %xmm0
9909 ; SSE41-NEXT: pshufb %xmm0, %xmm3
9910 ; SSE41-NEXT: paddb %xmm4, %xmm3
9911 ; SSE41-NEXT: pxor %xmm0, %xmm0
9912 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
9913 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
9914 ; SSE41-NEXT: psadbw %xmm0, %xmm3
9915 ; SSE41-NEXT: psadbw %xmm0, %xmm1
9916 ; SSE41-NEXT: packuswb %xmm3, %xmm1
9917 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [13,13,13,13]
9918 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
9921 ; AVX1-LABEL: ult_13_v4i32:
9923 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9924 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
9925 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9926 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
9927 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
9928 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
9929 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
9930 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
9931 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
9932 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9933 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9934 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9935 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9936 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9937 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [13,13,13,13]
9938 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
9941 ; AVX2-LABEL: ult_13_v4i32:
9943 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
9944 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
9945 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
9946 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
9947 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
9948 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
9949 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
9950 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
9951 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
9952 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9953 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9954 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9955 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9956 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9957 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13]
9958 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
9961 ; AVX512VPOPCNTDQ-LABEL: ult_13_v4i32:
9962 ; AVX512VPOPCNTDQ: # %bb.0:
9963 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
9964 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
9965 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13]
9966 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
9967 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
9968 ; AVX512VPOPCNTDQ-NEXT: retq
9970 ; AVX512VPOPCNTDQVL-LABEL: ult_13_v4i32:
9971 ; AVX512VPOPCNTDQVL: # %bb.0:
9972 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
9973 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13]
9974 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
9975 ; AVX512VPOPCNTDQVL-NEXT: retq
9977 ; BITALG_NOVLX-LABEL: ult_13_v4i32:
9978 ; BITALG_NOVLX: # %bb.0:
9979 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
9980 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
9981 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
9982 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9983 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9984 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9985 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
9986 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
9987 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13]
9988 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
9989 ; BITALG_NOVLX-NEXT: vzeroupper
9990 ; BITALG_NOVLX-NEXT: retq
9992 ; BITALG-LABEL: ult_13_v4i32:
9994 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
9995 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
9996 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
9997 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
9998 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
9999 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10000 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10001 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13]
10002 ; BITALG-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
10003 ; BITALG-NEXT: retq
10004 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
10005 %3 = icmp ult <4 x i32> %2, <i32 13, i32 13, i32 13, i32 13>
10006 %4 = sext <4 x i1> %3 to <4 x i32>
10010 define <4 x i32> @ugt_13_v4i32(<4 x i32> %0) {
10011 ; SSE2-LABEL: ugt_13_v4i32:
10013 ; SSE2-NEXT: movdqa %xmm0, %xmm1
10014 ; SSE2-NEXT: psrlw $1, %xmm1
10015 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10016 ; SSE2-NEXT: psubb %xmm1, %xmm0
10017 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
10018 ; SSE2-NEXT: movdqa %xmm0, %xmm2
10019 ; SSE2-NEXT: pand %xmm1, %xmm2
10020 ; SSE2-NEXT: psrlw $2, %xmm0
10021 ; SSE2-NEXT: pand %xmm1, %xmm0
10022 ; SSE2-NEXT: paddb %xmm2, %xmm0
10023 ; SSE2-NEXT: movdqa %xmm0, %xmm1
10024 ; SSE2-NEXT: psrlw $4, %xmm1
10025 ; SSE2-NEXT: paddb %xmm1, %xmm0
10026 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10027 ; SSE2-NEXT: pxor %xmm1, %xmm1
10028 ; SSE2-NEXT: movdqa %xmm0, %xmm2
10029 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
10030 ; SSE2-NEXT: psadbw %xmm1, %xmm2
10031 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
10032 ; SSE2-NEXT: psadbw %xmm1, %xmm0
10033 ; SSE2-NEXT: packuswb %xmm2, %xmm0
10034 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10037 ; SSE3-LABEL: ugt_13_v4i32:
10039 ; SSE3-NEXT: movdqa %xmm0, %xmm1
10040 ; SSE3-NEXT: psrlw $1, %xmm1
10041 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10042 ; SSE3-NEXT: psubb %xmm1, %xmm0
10043 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
10044 ; SSE3-NEXT: movdqa %xmm0, %xmm2
10045 ; SSE3-NEXT: pand %xmm1, %xmm2
10046 ; SSE3-NEXT: psrlw $2, %xmm0
10047 ; SSE3-NEXT: pand %xmm1, %xmm0
10048 ; SSE3-NEXT: paddb %xmm2, %xmm0
10049 ; SSE3-NEXT: movdqa %xmm0, %xmm1
10050 ; SSE3-NEXT: psrlw $4, %xmm1
10051 ; SSE3-NEXT: paddb %xmm1, %xmm0
10052 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10053 ; SSE3-NEXT: pxor %xmm1, %xmm1
10054 ; SSE3-NEXT: movdqa %xmm0, %xmm2
10055 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
10056 ; SSE3-NEXT: psadbw %xmm1, %xmm2
10057 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
10058 ; SSE3-NEXT: psadbw %xmm1, %xmm0
10059 ; SSE3-NEXT: packuswb %xmm2, %xmm0
10060 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10063 ; SSSE3-LABEL: ugt_13_v4i32:
10065 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10066 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
10067 ; SSSE3-NEXT: pand %xmm2, %xmm3
10068 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10069 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
10070 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
10071 ; SSSE3-NEXT: psrlw $4, %xmm0
10072 ; SSSE3-NEXT: pand %xmm2, %xmm0
10073 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
10074 ; SSSE3-NEXT: paddb %xmm4, %xmm1
10075 ; SSSE3-NEXT: pxor %xmm0, %xmm0
10076 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
10077 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
10078 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
10079 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
10080 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
10081 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
10082 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10083 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
10086 ; SSE41-LABEL: ugt_13_v4i32:
10088 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10089 ; SSE41-NEXT: movdqa %xmm0, %xmm2
10090 ; SSE41-NEXT: pand %xmm1, %xmm2
10091 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10092 ; SSE41-NEXT: movdqa %xmm3, %xmm4
10093 ; SSE41-NEXT: pshufb %xmm2, %xmm4
10094 ; SSE41-NEXT: psrlw $4, %xmm0
10095 ; SSE41-NEXT: pand %xmm1, %xmm0
10096 ; SSE41-NEXT: pshufb %xmm0, %xmm3
10097 ; SSE41-NEXT: paddb %xmm4, %xmm3
10098 ; SSE41-NEXT: pxor %xmm1, %xmm1
10099 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
10100 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
10101 ; SSE41-NEXT: psadbw %xmm1, %xmm3
10102 ; SSE41-NEXT: psadbw %xmm1, %xmm0
10103 ; SSE41-NEXT: packuswb %xmm3, %xmm0
10104 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10107 ; AVX1-LABEL: ugt_13_v4i32:
10109 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10110 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
10111 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10112 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
10113 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
10114 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
10115 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
10116 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
10117 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
10118 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10119 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10120 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10121 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10122 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10123 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
10126 ; AVX2-LABEL: ugt_13_v4i32:
10128 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10129 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
10130 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10131 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
10132 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
10133 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
10134 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
10135 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
10136 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
10137 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10138 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10139 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10140 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10141 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10142 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13]
10143 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
10146 ; AVX512VPOPCNTDQ-LABEL: ugt_13_v4i32:
10147 ; AVX512VPOPCNTDQ: # %bb.0:
10148 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10149 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
10150 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13]
10151 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
10152 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
10153 ; AVX512VPOPCNTDQ-NEXT: retq
10155 ; AVX512VPOPCNTDQVL-LABEL: ugt_13_v4i32:
10156 ; AVX512VPOPCNTDQVL: # %bb.0:
10157 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
10158 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13]
10159 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
10160 ; AVX512VPOPCNTDQVL-NEXT: retq
10162 ; BITALG_NOVLX-LABEL: ugt_13_v4i32:
10163 ; BITALG_NOVLX: # %bb.0:
10164 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10165 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
10166 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
10167 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10168 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10169 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10170 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10171 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10172 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13]
10173 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
10174 ; BITALG_NOVLX-NEXT: vzeroupper
10175 ; BITALG_NOVLX-NEXT: retq
10177 ; BITALG-LABEL: ugt_13_v4i32:
10179 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
10180 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
10181 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10182 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10183 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10184 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10185 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10186 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [13,13,13,13]
10187 ; BITALG-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
10188 ; BITALG-NEXT: retq
10189 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
10190 %3 = icmp ugt <4 x i32> %2, <i32 13, i32 13, i32 13, i32 13>
10191 %4 = sext <4 x i1> %3 to <4 x i32>
10195 define <4 x i32> @ult_14_v4i32(<4 x i32> %0) {
10196 ; SSE2-LABEL: ult_14_v4i32:
10198 ; SSE2-NEXT: movdqa %xmm0, %xmm1
10199 ; SSE2-NEXT: psrlw $1, %xmm1
10200 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10201 ; SSE2-NEXT: psubb %xmm1, %xmm0
10202 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
10203 ; SSE2-NEXT: movdqa %xmm0, %xmm2
10204 ; SSE2-NEXT: pand %xmm1, %xmm2
10205 ; SSE2-NEXT: psrlw $2, %xmm0
10206 ; SSE2-NEXT: pand %xmm1, %xmm0
10207 ; SSE2-NEXT: paddb %xmm2, %xmm0
10208 ; SSE2-NEXT: movdqa %xmm0, %xmm1
10209 ; SSE2-NEXT: psrlw $4, %xmm1
10210 ; SSE2-NEXT: paddb %xmm0, %xmm1
10211 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10212 ; SSE2-NEXT: pxor %xmm0, %xmm0
10213 ; SSE2-NEXT: movdqa %xmm1, %xmm2
10214 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
10215 ; SSE2-NEXT: psadbw %xmm0, %xmm2
10216 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
10217 ; SSE2-NEXT: psadbw %xmm0, %xmm1
10218 ; SSE2-NEXT: packuswb %xmm2, %xmm1
10219 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14]
10220 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
10223 ; SSE3-LABEL: ult_14_v4i32:
10225 ; SSE3-NEXT: movdqa %xmm0, %xmm1
10226 ; SSE3-NEXT: psrlw $1, %xmm1
10227 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10228 ; SSE3-NEXT: psubb %xmm1, %xmm0
10229 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
10230 ; SSE3-NEXT: movdqa %xmm0, %xmm2
10231 ; SSE3-NEXT: pand %xmm1, %xmm2
10232 ; SSE3-NEXT: psrlw $2, %xmm0
10233 ; SSE3-NEXT: pand %xmm1, %xmm0
10234 ; SSE3-NEXT: paddb %xmm2, %xmm0
10235 ; SSE3-NEXT: movdqa %xmm0, %xmm1
10236 ; SSE3-NEXT: psrlw $4, %xmm1
10237 ; SSE3-NEXT: paddb %xmm0, %xmm1
10238 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10239 ; SSE3-NEXT: pxor %xmm0, %xmm0
10240 ; SSE3-NEXT: movdqa %xmm1, %xmm2
10241 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
10242 ; SSE3-NEXT: psadbw %xmm0, %xmm2
10243 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
10244 ; SSE3-NEXT: psadbw %xmm0, %xmm1
10245 ; SSE3-NEXT: packuswb %xmm2, %xmm1
10246 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14]
10247 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
10250 ; SSSE3-LABEL: ult_14_v4i32:
10252 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10253 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
10254 ; SSSE3-NEXT: pand %xmm1, %xmm2
10255 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10256 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
10257 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
10258 ; SSSE3-NEXT: psrlw $4, %xmm0
10259 ; SSSE3-NEXT: pand %xmm1, %xmm0
10260 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
10261 ; SSSE3-NEXT: paddb %xmm4, %xmm3
10262 ; SSSE3-NEXT: pxor %xmm0, %xmm0
10263 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
10264 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
10265 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
10266 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
10267 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
10268 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
10269 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14]
10270 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
10273 ; SSE41-LABEL: ult_14_v4i32:
10275 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10276 ; SSE41-NEXT: movdqa %xmm0, %xmm2
10277 ; SSE41-NEXT: pand %xmm1, %xmm2
10278 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10279 ; SSE41-NEXT: movdqa %xmm3, %xmm4
10280 ; SSE41-NEXT: pshufb %xmm2, %xmm4
10281 ; SSE41-NEXT: psrlw $4, %xmm0
10282 ; SSE41-NEXT: pand %xmm1, %xmm0
10283 ; SSE41-NEXT: pshufb %xmm0, %xmm3
10284 ; SSE41-NEXT: paddb %xmm4, %xmm3
10285 ; SSE41-NEXT: pxor %xmm0, %xmm0
10286 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
10287 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
10288 ; SSE41-NEXT: psadbw %xmm0, %xmm3
10289 ; SSE41-NEXT: psadbw %xmm0, %xmm1
10290 ; SSE41-NEXT: packuswb %xmm3, %xmm1
10291 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [14,14,14,14]
10292 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
10295 ; AVX1-LABEL: ult_14_v4i32:
10297 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10298 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
10299 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10300 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
10301 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
10302 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
10303 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
10304 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
10305 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
10306 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10307 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10308 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10309 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10310 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10311 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [14,14,14,14]
10312 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
10315 ; AVX2-LABEL: ult_14_v4i32:
10317 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10318 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
10319 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10320 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
10321 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
10322 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
10323 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
10324 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
10325 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
10326 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10327 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10328 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10329 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10330 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10331 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14]
10332 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
10335 ; AVX512VPOPCNTDQ-LABEL: ult_14_v4i32:
10336 ; AVX512VPOPCNTDQ: # %bb.0:
10337 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10338 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
10339 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14]
10340 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
10341 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
10342 ; AVX512VPOPCNTDQ-NEXT: retq
10344 ; AVX512VPOPCNTDQVL-LABEL: ult_14_v4i32:
10345 ; AVX512VPOPCNTDQVL: # %bb.0:
10346 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
10347 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14]
10348 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
10349 ; AVX512VPOPCNTDQVL-NEXT: retq
10351 ; BITALG_NOVLX-LABEL: ult_14_v4i32:
10352 ; BITALG_NOVLX: # %bb.0:
10353 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10354 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
10355 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
10356 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10357 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10358 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10359 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10360 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10361 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14]
10362 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
10363 ; BITALG_NOVLX-NEXT: vzeroupper
10364 ; BITALG_NOVLX-NEXT: retq
10366 ; BITALG-LABEL: ult_14_v4i32:
10368 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
10369 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
10370 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10371 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10372 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10373 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10374 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10375 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14]
10376 ; BITALG-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
10377 ; BITALG-NEXT: retq
10378 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
10379 %3 = icmp ult <4 x i32> %2, <i32 14, i32 14, i32 14, i32 14>
10380 %4 = sext <4 x i1> %3 to <4 x i32>
10384 define <4 x i32> @ugt_14_v4i32(<4 x i32> %0) {
10385 ; SSE2-LABEL: ugt_14_v4i32:
10387 ; SSE2-NEXT: movdqa %xmm0, %xmm1
10388 ; SSE2-NEXT: psrlw $1, %xmm1
10389 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10390 ; SSE2-NEXT: psubb %xmm1, %xmm0
10391 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
10392 ; SSE2-NEXT: movdqa %xmm0, %xmm2
10393 ; SSE2-NEXT: pand %xmm1, %xmm2
10394 ; SSE2-NEXT: psrlw $2, %xmm0
10395 ; SSE2-NEXT: pand %xmm1, %xmm0
10396 ; SSE2-NEXT: paddb %xmm2, %xmm0
10397 ; SSE2-NEXT: movdqa %xmm0, %xmm1
10398 ; SSE2-NEXT: psrlw $4, %xmm1
10399 ; SSE2-NEXT: paddb %xmm1, %xmm0
10400 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10401 ; SSE2-NEXT: pxor %xmm1, %xmm1
10402 ; SSE2-NEXT: movdqa %xmm0, %xmm2
10403 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
10404 ; SSE2-NEXT: psadbw %xmm1, %xmm2
10405 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
10406 ; SSE2-NEXT: psadbw %xmm1, %xmm0
10407 ; SSE2-NEXT: packuswb %xmm2, %xmm0
10408 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10411 ; SSE3-LABEL: ugt_14_v4i32:
10413 ; SSE3-NEXT: movdqa %xmm0, %xmm1
10414 ; SSE3-NEXT: psrlw $1, %xmm1
10415 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10416 ; SSE3-NEXT: psubb %xmm1, %xmm0
10417 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
10418 ; SSE3-NEXT: movdqa %xmm0, %xmm2
10419 ; SSE3-NEXT: pand %xmm1, %xmm2
10420 ; SSE3-NEXT: psrlw $2, %xmm0
10421 ; SSE3-NEXT: pand %xmm1, %xmm0
10422 ; SSE3-NEXT: paddb %xmm2, %xmm0
10423 ; SSE3-NEXT: movdqa %xmm0, %xmm1
10424 ; SSE3-NEXT: psrlw $4, %xmm1
10425 ; SSE3-NEXT: paddb %xmm1, %xmm0
10426 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10427 ; SSE3-NEXT: pxor %xmm1, %xmm1
10428 ; SSE3-NEXT: movdqa %xmm0, %xmm2
10429 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
10430 ; SSE3-NEXT: psadbw %xmm1, %xmm2
10431 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
10432 ; SSE3-NEXT: psadbw %xmm1, %xmm0
10433 ; SSE3-NEXT: packuswb %xmm2, %xmm0
10434 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10437 ; SSSE3-LABEL: ugt_14_v4i32:
10439 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10440 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
10441 ; SSSE3-NEXT: pand %xmm2, %xmm3
10442 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10443 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
10444 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
10445 ; SSSE3-NEXT: psrlw $4, %xmm0
10446 ; SSSE3-NEXT: pand %xmm2, %xmm0
10447 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
10448 ; SSSE3-NEXT: paddb %xmm4, %xmm1
10449 ; SSSE3-NEXT: pxor %xmm0, %xmm0
10450 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
10451 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
10452 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
10453 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
10454 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
10455 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
10456 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10457 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
10460 ; SSE41-LABEL: ugt_14_v4i32:
10462 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10463 ; SSE41-NEXT: movdqa %xmm0, %xmm2
10464 ; SSE41-NEXT: pand %xmm1, %xmm2
10465 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10466 ; SSE41-NEXT: movdqa %xmm3, %xmm4
10467 ; SSE41-NEXT: pshufb %xmm2, %xmm4
10468 ; SSE41-NEXT: psrlw $4, %xmm0
10469 ; SSE41-NEXT: pand %xmm1, %xmm0
10470 ; SSE41-NEXT: pshufb %xmm0, %xmm3
10471 ; SSE41-NEXT: paddb %xmm4, %xmm3
10472 ; SSE41-NEXT: pxor %xmm1, %xmm1
10473 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
10474 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
10475 ; SSE41-NEXT: psadbw %xmm1, %xmm3
10476 ; SSE41-NEXT: psadbw %xmm1, %xmm0
10477 ; SSE41-NEXT: packuswb %xmm3, %xmm0
10478 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10481 ; AVX1-LABEL: ugt_14_v4i32:
10483 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10484 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
10485 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10486 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
10487 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
10488 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
10489 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
10490 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
10491 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
10492 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10493 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10494 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10495 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10496 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10497 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
10500 ; AVX2-LABEL: ugt_14_v4i32:
10502 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10503 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
10504 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10505 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
10506 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
10507 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
10508 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
10509 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
10510 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
10511 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10512 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10513 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10514 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10515 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10516 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14]
10517 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
10520 ; AVX512VPOPCNTDQ-LABEL: ugt_14_v4i32:
10521 ; AVX512VPOPCNTDQ: # %bb.0:
10522 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10523 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
10524 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14]
10525 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
10526 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
10527 ; AVX512VPOPCNTDQ-NEXT: retq
10529 ; AVX512VPOPCNTDQVL-LABEL: ugt_14_v4i32:
10530 ; AVX512VPOPCNTDQVL: # %bb.0:
10531 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
10532 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14]
10533 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
10534 ; AVX512VPOPCNTDQVL-NEXT: retq
10536 ; BITALG_NOVLX-LABEL: ugt_14_v4i32:
10537 ; BITALG_NOVLX: # %bb.0:
10538 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10539 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
10540 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
10541 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10542 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10543 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10544 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10545 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10546 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14]
10547 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
10548 ; BITALG_NOVLX-NEXT: vzeroupper
10549 ; BITALG_NOVLX-NEXT: retq
10551 ; BITALG-LABEL: ugt_14_v4i32:
10553 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
10554 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
10555 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10556 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10557 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10558 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10559 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10560 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [14,14,14,14]
10561 ; BITALG-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
10562 ; BITALG-NEXT: retq
10563 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
10564 %3 = icmp ugt <4 x i32> %2, <i32 14, i32 14, i32 14, i32 14>
10565 %4 = sext <4 x i1> %3 to <4 x i32>
10569 define <4 x i32> @ult_15_v4i32(<4 x i32> %0) {
10570 ; SSE2-LABEL: ult_15_v4i32:
10572 ; SSE2-NEXT: movdqa %xmm0, %xmm1
10573 ; SSE2-NEXT: psrlw $1, %xmm1
10574 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10575 ; SSE2-NEXT: psubb %xmm1, %xmm0
10576 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
10577 ; SSE2-NEXT: movdqa %xmm0, %xmm2
10578 ; SSE2-NEXT: pand %xmm1, %xmm2
10579 ; SSE2-NEXT: psrlw $2, %xmm0
10580 ; SSE2-NEXT: pand %xmm1, %xmm0
10581 ; SSE2-NEXT: paddb %xmm2, %xmm0
10582 ; SSE2-NEXT: movdqa %xmm0, %xmm1
10583 ; SSE2-NEXT: psrlw $4, %xmm1
10584 ; SSE2-NEXT: paddb %xmm0, %xmm1
10585 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10586 ; SSE2-NEXT: pxor %xmm0, %xmm0
10587 ; SSE2-NEXT: movdqa %xmm1, %xmm2
10588 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
10589 ; SSE2-NEXT: psadbw %xmm0, %xmm2
10590 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
10591 ; SSE2-NEXT: psadbw %xmm0, %xmm1
10592 ; SSE2-NEXT: packuswb %xmm2, %xmm1
10593 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15]
10594 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
10597 ; SSE3-LABEL: ult_15_v4i32:
10599 ; SSE3-NEXT: movdqa %xmm0, %xmm1
10600 ; SSE3-NEXT: psrlw $1, %xmm1
10601 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10602 ; SSE3-NEXT: psubb %xmm1, %xmm0
10603 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
10604 ; SSE3-NEXT: movdqa %xmm0, %xmm2
10605 ; SSE3-NEXT: pand %xmm1, %xmm2
10606 ; SSE3-NEXT: psrlw $2, %xmm0
10607 ; SSE3-NEXT: pand %xmm1, %xmm0
10608 ; SSE3-NEXT: paddb %xmm2, %xmm0
10609 ; SSE3-NEXT: movdqa %xmm0, %xmm1
10610 ; SSE3-NEXT: psrlw $4, %xmm1
10611 ; SSE3-NEXT: paddb %xmm0, %xmm1
10612 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10613 ; SSE3-NEXT: pxor %xmm0, %xmm0
10614 ; SSE3-NEXT: movdqa %xmm1, %xmm2
10615 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
10616 ; SSE3-NEXT: psadbw %xmm0, %xmm2
10617 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
10618 ; SSE3-NEXT: psadbw %xmm0, %xmm1
10619 ; SSE3-NEXT: packuswb %xmm2, %xmm1
10620 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15]
10621 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
10624 ; SSSE3-LABEL: ult_15_v4i32:
10626 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10627 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
10628 ; SSSE3-NEXT: pand %xmm1, %xmm2
10629 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10630 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
10631 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
10632 ; SSSE3-NEXT: psrlw $4, %xmm0
10633 ; SSSE3-NEXT: pand %xmm1, %xmm0
10634 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
10635 ; SSSE3-NEXT: paddb %xmm4, %xmm3
10636 ; SSSE3-NEXT: pxor %xmm0, %xmm0
10637 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
10638 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
10639 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
10640 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
10641 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
10642 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
10643 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15]
10644 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
10647 ; SSE41-LABEL: ult_15_v4i32:
10649 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10650 ; SSE41-NEXT: movdqa %xmm0, %xmm2
10651 ; SSE41-NEXT: pand %xmm1, %xmm2
10652 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10653 ; SSE41-NEXT: movdqa %xmm3, %xmm4
10654 ; SSE41-NEXT: pshufb %xmm2, %xmm4
10655 ; SSE41-NEXT: psrlw $4, %xmm0
10656 ; SSE41-NEXT: pand %xmm1, %xmm0
10657 ; SSE41-NEXT: pshufb %xmm0, %xmm3
10658 ; SSE41-NEXT: paddb %xmm4, %xmm3
10659 ; SSE41-NEXT: pxor %xmm0, %xmm0
10660 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
10661 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
10662 ; SSE41-NEXT: psadbw %xmm0, %xmm3
10663 ; SSE41-NEXT: psadbw %xmm0, %xmm1
10664 ; SSE41-NEXT: packuswb %xmm3, %xmm1
10665 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [15,15,15,15]
10666 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
10669 ; AVX1-LABEL: ult_15_v4i32:
10671 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10672 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
10673 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10674 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
10675 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
10676 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
10677 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
10678 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
10679 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
10680 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10681 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10682 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10683 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10684 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10685 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15]
10686 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
10689 ; AVX2-LABEL: ult_15_v4i32:
10691 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10692 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
10693 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10694 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
10695 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
10696 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
10697 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
10698 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
10699 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
10700 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10701 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10702 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10703 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10704 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10705 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
10706 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
10709 ; AVX512VPOPCNTDQ-LABEL: ult_15_v4i32:
10710 ; AVX512VPOPCNTDQ: # %bb.0:
10711 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10712 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
10713 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
10714 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
10715 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
10716 ; AVX512VPOPCNTDQ-NEXT: retq
10718 ; AVX512VPOPCNTDQVL-LABEL: ult_15_v4i32:
10719 ; AVX512VPOPCNTDQVL: # %bb.0:
10720 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
10721 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
10722 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
10723 ; AVX512VPOPCNTDQVL-NEXT: retq
10725 ; BITALG_NOVLX-LABEL: ult_15_v4i32:
10726 ; BITALG_NOVLX: # %bb.0:
10727 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10728 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
10729 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
10730 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10731 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10732 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10733 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10734 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10735 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
10736 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
10737 ; BITALG_NOVLX-NEXT: vzeroupper
10738 ; BITALG_NOVLX-NEXT: retq
10740 ; BITALG-LABEL: ult_15_v4i32:
10742 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
10743 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
10744 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10745 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10746 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10747 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10748 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10749 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
10750 ; BITALG-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
10751 ; BITALG-NEXT: retq
10752 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
10753 %3 = icmp ult <4 x i32> %2, <i32 15, i32 15, i32 15, i32 15>
10754 %4 = sext <4 x i1> %3 to <4 x i32>
10758 define <4 x i32> @ugt_15_v4i32(<4 x i32> %0) {
10759 ; SSE2-LABEL: ugt_15_v4i32:
10761 ; SSE2-NEXT: movdqa %xmm0, %xmm1
10762 ; SSE2-NEXT: psrlw $1, %xmm1
10763 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10764 ; SSE2-NEXT: psubb %xmm1, %xmm0
10765 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
10766 ; SSE2-NEXT: movdqa %xmm0, %xmm2
10767 ; SSE2-NEXT: pand %xmm1, %xmm2
10768 ; SSE2-NEXT: psrlw $2, %xmm0
10769 ; SSE2-NEXT: pand %xmm1, %xmm0
10770 ; SSE2-NEXT: paddb %xmm2, %xmm0
10771 ; SSE2-NEXT: movdqa %xmm0, %xmm1
10772 ; SSE2-NEXT: psrlw $4, %xmm1
10773 ; SSE2-NEXT: paddb %xmm1, %xmm0
10774 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10775 ; SSE2-NEXT: pxor %xmm1, %xmm1
10776 ; SSE2-NEXT: movdqa %xmm0, %xmm2
10777 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
10778 ; SSE2-NEXT: psadbw %xmm1, %xmm2
10779 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
10780 ; SSE2-NEXT: psadbw %xmm1, %xmm0
10781 ; SSE2-NEXT: packuswb %xmm2, %xmm0
10782 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10785 ; SSE3-LABEL: ugt_15_v4i32:
10787 ; SSE3-NEXT: movdqa %xmm0, %xmm1
10788 ; SSE3-NEXT: psrlw $1, %xmm1
10789 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10790 ; SSE3-NEXT: psubb %xmm1, %xmm0
10791 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
10792 ; SSE3-NEXT: movdqa %xmm0, %xmm2
10793 ; SSE3-NEXT: pand %xmm1, %xmm2
10794 ; SSE3-NEXT: psrlw $2, %xmm0
10795 ; SSE3-NEXT: pand %xmm1, %xmm0
10796 ; SSE3-NEXT: paddb %xmm2, %xmm0
10797 ; SSE3-NEXT: movdqa %xmm0, %xmm1
10798 ; SSE3-NEXT: psrlw $4, %xmm1
10799 ; SSE3-NEXT: paddb %xmm1, %xmm0
10800 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10801 ; SSE3-NEXT: pxor %xmm1, %xmm1
10802 ; SSE3-NEXT: movdqa %xmm0, %xmm2
10803 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
10804 ; SSE3-NEXT: psadbw %xmm1, %xmm2
10805 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
10806 ; SSE3-NEXT: psadbw %xmm1, %xmm0
10807 ; SSE3-NEXT: packuswb %xmm2, %xmm0
10808 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10811 ; SSSE3-LABEL: ugt_15_v4i32:
10813 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10814 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
10815 ; SSSE3-NEXT: pand %xmm2, %xmm3
10816 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10817 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
10818 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
10819 ; SSSE3-NEXT: psrlw $4, %xmm0
10820 ; SSSE3-NEXT: pand %xmm2, %xmm0
10821 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
10822 ; SSSE3-NEXT: paddb %xmm4, %xmm1
10823 ; SSSE3-NEXT: pxor %xmm0, %xmm0
10824 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
10825 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
10826 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
10827 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
10828 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
10829 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
10830 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10831 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
10834 ; SSE41-LABEL: ugt_15_v4i32:
10836 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10837 ; SSE41-NEXT: movdqa %xmm0, %xmm2
10838 ; SSE41-NEXT: pand %xmm1, %xmm2
10839 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10840 ; SSE41-NEXT: movdqa %xmm3, %xmm4
10841 ; SSE41-NEXT: pshufb %xmm2, %xmm4
10842 ; SSE41-NEXT: psrlw $4, %xmm0
10843 ; SSE41-NEXT: pand %xmm1, %xmm0
10844 ; SSE41-NEXT: pshufb %xmm0, %xmm3
10845 ; SSE41-NEXT: paddb %xmm4, %xmm3
10846 ; SSE41-NEXT: pxor %xmm1, %xmm1
10847 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
10848 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
10849 ; SSE41-NEXT: psadbw %xmm1, %xmm3
10850 ; SSE41-NEXT: psadbw %xmm1, %xmm0
10851 ; SSE41-NEXT: packuswb %xmm3, %xmm0
10852 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10855 ; AVX1-LABEL: ugt_15_v4i32:
10857 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10858 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
10859 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10860 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
10861 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
10862 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
10863 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
10864 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
10865 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
10866 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10867 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10868 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10869 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10870 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10871 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
10874 ; AVX2-LABEL: ugt_15_v4i32:
10876 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
10877 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
10878 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
10879 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
10880 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
10881 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
10882 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
10883 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
10884 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
10885 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10886 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10887 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10888 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10889 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10890 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
10891 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
10894 ; AVX512VPOPCNTDQ-LABEL: ugt_15_v4i32:
10895 ; AVX512VPOPCNTDQ: # %bb.0:
10896 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10897 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
10898 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
10899 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
10900 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
10901 ; AVX512VPOPCNTDQ-NEXT: retq
10903 ; AVX512VPOPCNTDQVL-LABEL: ugt_15_v4i32:
10904 ; AVX512VPOPCNTDQVL: # %bb.0:
10905 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
10906 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
10907 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
10908 ; AVX512VPOPCNTDQVL-NEXT: retq
10910 ; BITALG_NOVLX-LABEL: ugt_15_v4i32:
10911 ; BITALG_NOVLX: # %bb.0:
10912 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
10913 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
10914 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
10915 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10916 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10917 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10918 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10919 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10920 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
10921 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
10922 ; BITALG_NOVLX-NEXT: vzeroupper
10923 ; BITALG_NOVLX-NEXT: retq
10925 ; BITALG-LABEL: ugt_15_v4i32:
10927 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
10928 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
10929 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10930 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
10931 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
10932 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
10933 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
10934 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
10935 ; BITALG-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
10936 ; BITALG-NEXT: retq
10937 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
10938 %3 = icmp ugt <4 x i32> %2, <i32 15, i32 15, i32 15, i32 15>
10939 %4 = sext <4 x i1> %3 to <4 x i32>
10943 define <4 x i32> @ult_16_v4i32(<4 x i32> %0) {
10944 ; SSE2-LABEL: ult_16_v4i32:
10946 ; SSE2-NEXT: movdqa %xmm0, %xmm1
10947 ; SSE2-NEXT: psrlw $1, %xmm1
10948 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10949 ; SSE2-NEXT: psubb %xmm1, %xmm0
10950 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
10951 ; SSE2-NEXT: movdqa %xmm0, %xmm2
10952 ; SSE2-NEXT: pand %xmm1, %xmm2
10953 ; SSE2-NEXT: psrlw $2, %xmm0
10954 ; SSE2-NEXT: pand %xmm1, %xmm0
10955 ; SSE2-NEXT: paddb %xmm2, %xmm0
10956 ; SSE2-NEXT: movdqa %xmm0, %xmm1
10957 ; SSE2-NEXT: psrlw $4, %xmm1
10958 ; SSE2-NEXT: paddb %xmm0, %xmm1
10959 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10960 ; SSE2-NEXT: pxor %xmm0, %xmm0
10961 ; SSE2-NEXT: movdqa %xmm1, %xmm2
10962 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
10963 ; SSE2-NEXT: psadbw %xmm0, %xmm2
10964 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
10965 ; SSE2-NEXT: psadbw %xmm0, %xmm1
10966 ; SSE2-NEXT: packuswb %xmm2, %xmm1
10967 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [16,16,16,16]
10968 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
10971 ; SSE3-LABEL: ult_16_v4i32:
10973 ; SSE3-NEXT: movdqa %xmm0, %xmm1
10974 ; SSE3-NEXT: psrlw $1, %xmm1
10975 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10976 ; SSE3-NEXT: psubb %xmm1, %xmm0
10977 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
10978 ; SSE3-NEXT: movdqa %xmm0, %xmm2
10979 ; SSE3-NEXT: pand %xmm1, %xmm2
10980 ; SSE3-NEXT: psrlw $2, %xmm0
10981 ; SSE3-NEXT: pand %xmm1, %xmm0
10982 ; SSE3-NEXT: paddb %xmm2, %xmm0
10983 ; SSE3-NEXT: movdqa %xmm0, %xmm1
10984 ; SSE3-NEXT: psrlw $4, %xmm1
10985 ; SSE3-NEXT: paddb %xmm0, %xmm1
10986 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
10987 ; SSE3-NEXT: pxor %xmm0, %xmm0
10988 ; SSE3-NEXT: movdqa %xmm1, %xmm2
10989 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
10990 ; SSE3-NEXT: psadbw %xmm0, %xmm2
10991 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
10992 ; SSE3-NEXT: psadbw %xmm0, %xmm1
10993 ; SSE3-NEXT: packuswb %xmm2, %xmm1
10994 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [16,16,16,16]
10995 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
10998 ; SSSE3-LABEL: ult_16_v4i32:
11000 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11001 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
11002 ; SSSE3-NEXT: pand %xmm1, %xmm2
11003 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11004 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
11005 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
11006 ; SSSE3-NEXT: psrlw $4, %xmm0
11007 ; SSSE3-NEXT: pand %xmm1, %xmm0
11008 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
11009 ; SSSE3-NEXT: paddb %xmm4, %xmm3
11010 ; SSSE3-NEXT: pxor %xmm0, %xmm0
11011 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
11012 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
11013 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
11014 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
11015 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
11016 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
11017 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [16,16,16,16]
11018 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
11021 ; SSE41-LABEL: ult_16_v4i32:
11023 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11024 ; SSE41-NEXT: movdqa %xmm0, %xmm2
11025 ; SSE41-NEXT: pand %xmm1, %xmm2
11026 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11027 ; SSE41-NEXT: movdqa %xmm3, %xmm4
11028 ; SSE41-NEXT: pshufb %xmm2, %xmm4
11029 ; SSE41-NEXT: psrlw $4, %xmm0
11030 ; SSE41-NEXT: pand %xmm1, %xmm0
11031 ; SSE41-NEXT: pshufb %xmm0, %xmm3
11032 ; SSE41-NEXT: paddb %xmm4, %xmm3
11033 ; SSE41-NEXT: pxor %xmm0, %xmm0
11034 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
11035 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
11036 ; SSE41-NEXT: psadbw %xmm0, %xmm3
11037 ; SSE41-NEXT: psadbw %xmm0, %xmm1
11038 ; SSE41-NEXT: packuswb %xmm3, %xmm1
11039 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [16,16,16,16]
11040 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
11043 ; AVX1-LABEL: ult_16_v4i32:
11045 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11046 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
11047 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11048 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
11049 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
11050 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
11051 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
11052 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
11053 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
11054 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11055 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11056 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11057 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11058 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11059 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [16,16,16,16]
11060 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
11063 ; AVX2-LABEL: ult_16_v4i32:
11065 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11066 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
11067 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11068 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
11069 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
11070 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
11071 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
11072 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
11073 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
11074 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11075 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11076 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11077 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11078 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11079 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16]
11080 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
11083 ; AVX512VPOPCNTDQ-LABEL: ult_16_v4i32:
11084 ; AVX512VPOPCNTDQ: # %bb.0:
11085 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11086 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
11087 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16]
11088 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
11089 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
11090 ; AVX512VPOPCNTDQ-NEXT: retq
11092 ; AVX512VPOPCNTDQVL-LABEL: ult_16_v4i32:
11093 ; AVX512VPOPCNTDQVL: # %bb.0:
11094 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
11095 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16]
11096 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
11097 ; AVX512VPOPCNTDQVL-NEXT: retq
11099 ; BITALG_NOVLX-LABEL: ult_16_v4i32:
11100 ; BITALG_NOVLX: # %bb.0:
11101 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11102 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
11103 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
11104 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11105 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11106 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11107 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11108 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11109 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16]
11110 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
11111 ; BITALG_NOVLX-NEXT: vzeroupper
11112 ; BITALG_NOVLX-NEXT: retq
11114 ; BITALG-LABEL: ult_16_v4i32:
11116 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
11117 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
11118 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11119 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11120 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11121 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11122 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11123 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16]
11124 ; BITALG-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
11125 ; BITALG-NEXT: retq
11126 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
11127 %3 = icmp ult <4 x i32> %2, <i32 16, i32 16, i32 16, i32 16>
11128 %4 = sext <4 x i1> %3 to <4 x i32>
11132 define <4 x i32> @ugt_16_v4i32(<4 x i32> %0) {
11133 ; SSE2-LABEL: ugt_16_v4i32:
11135 ; SSE2-NEXT: movdqa %xmm0, %xmm1
11136 ; SSE2-NEXT: psrlw $1, %xmm1
11137 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11138 ; SSE2-NEXT: psubb %xmm1, %xmm0
11139 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
11140 ; SSE2-NEXT: movdqa %xmm0, %xmm2
11141 ; SSE2-NEXT: pand %xmm1, %xmm2
11142 ; SSE2-NEXT: psrlw $2, %xmm0
11143 ; SSE2-NEXT: pand %xmm1, %xmm0
11144 ; SSE2-NEXT: paddb %xmm2, %xmm0
11145 ; SSE2-NEXT: movdqa %xmm0, %xmm1
11146 ; SSE2-NEXT: psrlw $4, %xmm1
11147 ; SSE2-NEXT: paddb %xmm1, %xmm0
11148 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
11149 ; SSE2-NEXT: pxor %xmm1, %xmm1
11150 ; SSE2-NEXT: movdqa %xmm0, %xmm2
11151 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
11152 ; SSE2-NEXT: psadbw %xmm1, %xmm2
11153 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
11154 ; SSE2-NEXT: psadbw %xmm1, %xmm0
11155 ; SSE2-NEXT: packuswb %xmm2, %xmm0
11156 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
11159 ; SSE3-LABEL: ugt_16_v4i32:
11161 ; SSE3-NEXT: movdqa %xmm0, %xmm1
11162 ; SSE3-NEXT: psrlw $1, %xmm1
11163 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11164 ; SSE3-NEXT: psubb %xmm1, %xmm0
11165 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
11166 ; SSE3-NEXT: movdqa %xmm0, %xmm2
11167 ; SSE3-NEXT: pand %xmm1, %xmm2
11168 ; SSE3-NEXT: psrlw $2, %xmm0
11169 ; SSE3-NEXT: pand %xmm1, %xmm0
11170 ; SSE3-NEXT: paddb %xmm2, %xmm0
11171 ; SSE3-NEXT: movdqa %xmm0, %xmm1
11172 ; SSE3-NEXT: psrlw $4, %xmm1
11173 ; SSE3-NEXT: paddb %xmm1, %xmm0
11174 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
11175 ; SSE3-NEXT: pxor %xmm1, %xmm1
11176 ; SSE3-NEXT: movdqa %xmm0, %xmm2
11177 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
11178 ; SSE3-NEXT: psadbw %xmm1, %xmm2
11179 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
11180 ; SSE3-NEXT: psadbw %xmm1, %xmm0
11181 ; SSE3-NEXT: packuswb %xmm2, %xmm0
11182 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
11185 ; SSSE3-LABEL: ugt_16_v4i32:
11187 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11188 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
11189 ; SSSE3-NEXT: pand %xmm2, %xmm3
11190 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11191 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
11192 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
11193 ; SSSE3-NEXT: psrlw $4, %xmm0
11194 ; SSSE3-NEXT: pand %xmm2, %xmm0
11195 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
11196 ; SSSE3-NEXT: paddb %xmm4, %xmm1
11197 ; SSSE3-NEXT: pxor %xmm0, %xmm0
11198 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
11199 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
11200 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
11201 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
11202 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
11203 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
11204 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11205 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
11208 ; SSE41-LABEL: ugt_16_v4i32:
11210 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11211 ; SSE41-NEXT: movdqa %xmm0, %xmm2
11212 ; SSE41-NEXT: pand %xmm1, %xmm2
11213 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11214 ; SSE41-NEXT: movdqa %xmm3, %xmm4
11215 ; SSE41-NEXT: pshufb %xmm2, %xmm4
11216 ; SSE41-NEXT: psrlw $4, %xmm0
11217 ; SSE41-NEXT: pand %xmm1, %xmm0
11218 ; SSE41-NEXT: pshufb %xmm0, %xmm3
11219 ; SSE41-NEXT: paddb %xmm4, %xmm3
11220 ; SSE41-NEXT: pxor %xmm1, %xmm1
11221 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
11222 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
11223 ; SSE41-NEXT: psadbw %xmm1, %xmm3
11224 ; SSE41-NEXT: psadbw %xmm1, %xmm0
11225 ; SSE41-NEXT: packuswb %xmm3, %xmm0
11226 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
11229 ; AVX1-LABEL: ugt_16_v4i32:
11231 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11232 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
11233 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11234 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
11235 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
11236 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
11237 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
11238 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
11239 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
11240 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11241 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11242 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11243 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11244 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11245 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
11248 ; AVX2-LABEL: ugt_16_v4i32:
11250 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11251 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
11252 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11253 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
11254 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
11255 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
11256 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
11257 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
11258 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
11259 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11260 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11261 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11262 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11263 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11264 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16]
11265 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
11268 ; AVX512VPOPCNTDQ-LABEL: ugt_16_v4i32:
11269 ; AVX512VPOPCNTDQ: # %bb.0:
11270 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11271 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
11272 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16]
11273 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
11274 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
11275 ; AVX512VPOPCNTDQ-NEXT: retq
11277 ; AVX512VPOPCNTDQVL-LABEL: ugt_16_v4i32:
11278 ; AVX512VPOPCNTDQVL: # %bb.0:
11279 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
11280 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16]
11281 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
11282 ; AVX512VPOPCNTDQVL-NEXT: retq
11284 ; BITALG_NOVLX-LABEL: ugt_16_v4i32:
11285 ; BITALG_NOVLX: # %bb.0:
11286 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11287 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
11288 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
11289 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11290 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11291 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11292 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11293 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11294 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16]
11295 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
11296 ; BITALG_NOVLX-NEXT: vzeroupper
11297 ; BITALG_NOVLX-NEXT: retq
11299 ; BITALG-LABEL: ugt_16_v4i32:
11301 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
11302 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
11303 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11304 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11305 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11306 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11307 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11308 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16,16,16,16]
11309 ; BITALG-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
11310 ; BITALG-NEXT: retq
11311 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
11312 %3 = icmp ugt <4 x i32> %2, <i32 16, i32 16, i32 16, i32 16>
11313 %4 = sext <4 x i1> %3 to <4 x i32>
11317 define <4 x i32> @ult_17_v4i32(<4 x i32> %0) {
11318 ; SSE2-LABEL: ult_17_v4i32:
11320 ; SSE2-NEXT: movdqa %xmm0, %xmm1
11321 ; SSE2-NEXT: psrlw $1, %xmm1
11322 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11323 ; SSE2-NEXT: psubb %xmm1, %xmm0
11324 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
11325 ; SSE2-NEXT: movdqa %xmm0, %xmm2
11326 ; SSE2-NEXT: pand %xmm1, %xmm2
11327 ; SSE2-NEXT: psrlw $2, %xmm0
11328 ; SSE2-NEXT: pand %xmm1, %xmm0
11329 ; SSE2-NEXT: paddb %xmm2, %xmm0
11330 ; SSE2-NEXT: movdqa %xmm0, %xmm1
11331 ; SSE2-NEXT: psrlw $4, %xmm1
11332 ; SSE2-NEXT: paddb %xmm0, %xmm1
11333 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11334 ; SSE2-NEXT: pxor %xmm0, %xmm0
11335 ; SSE2-NEXT: movdqa %xmm1, %xmm2
11336 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
11337 ; SSE2-NEXT: psadbw %xmm0, %xmm2
11338 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
11339 ; SSE2-NEXT: psadbw %xmm0, %xmm1
11340 ; SSE2-NEXT: packuswb %xmm2, %xmm1
11341 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [17,17,17,17]
11342 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
11345 ; SSE3-LABEL: ult_17_v4i32:
11347 ; SSE3-NEXT: movdqa %xmm0, %xmm1
11348 ; SSE3-NEXT: psrlw $1, %xmm1
11349 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11350 ; SSE3-NEXT: psubb %xmm1, %xmm0
11351 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
11352 ; SSE3-NEXT: movdqa %xmm0, %xmm2
11353 ; SSE3-NEXT: pand %xmm1, %xmm2
11354 ; SSE3-NEXT: psrlw $2, %xmm0
11355 ; SSE3-NEXT: pand %xmm1, %xmm0
11356 ; SSE3-NEXT: paddb %xmm2, %xmm0
11357 ; SSE3-NEXT: movdqa %xmm0, %xmm1
11358 ; SSE3-NEXT: psrlw $4, %xmm1
11359 ; SSE3-NEXT: paddb %xmm0, %xmm1
11360 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11361 ; SSE3-NEXT: pxor %xmm0, %xmm0
11362 ; SSE3-NEXT: movdqa %xmm1, %xmm2
11363 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
11364 ; SSE3-NEXT: psadbw %xmm0, %xmm2
11365 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
11366 ; SSE3-NEXT: psadbw %xmm0, %xmm1
11367 ; SSE3-NEXT: packuswb %xmm2, %xmm1
11368 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [17,17,17,17]
11369 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
11372 ; SSSE3-LABEL: ult_17_v4i32:
11374 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11375 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
11376 ; SSSE3-NEXT: pand %xmm1, %xmm2
11377 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11378 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
11379 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
11380 ; SSSE3-NEXT: psrlw $4, %xmm0
11381 ; SSSE3-NEXT: pand %xmm1, %xmm0
11382 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
11383 ; SSSE3-NEXT: paddb %xmm4, %xmm3
11384 ; SSSE3-NEXT: pxor %xmm0, %xmm0
11385 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
11386 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
11387 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
11388 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
11389 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
11390 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
11391 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [17,17,17,17]
11392 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
11395 ; SSE41-LABEL: ult_17_v4i32:
11397 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11398 ; SSE41-NEXT: movdqa %xmm0, %xmm2
11399 ; SSE41-NEXT: pand %xmm1, %xmm2
11400 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11401 ; SSE41-NEXT: movdqa %xmm3, %xmm4
11402 ; SSE41-NEXT: pshufb %xmm2, %xmm4
11403 ; SSE41-NEXT: psrlw $4, %xmm0
11404 ; SSE41-NEXT: pand %xmm1, %xmm0
11405 ; SSE41-NEXT: pshufb %xmm0, %xmm3
11406 ; SSE41-NEXT: paddb %xmm4, %xmm3
11407 ; SSE41-NEXT: pxor %xmm0, %xmm0
11408 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
11409 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
11410 ; SSE41-NEXT: psadbw %xmm0, %xmm3
11411 ; SSE41-NEXT: psadbw %xmm0, %xmm1
11412 ; SSE41-NEXT: packuswb %xmm3, %xmm1
11413 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [17,17,17,17]
11414 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
11417 ; AVX1-LABEL: ult_17_v4i32:
11419 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11420 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
11421 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11422 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
11423 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
11424 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
11425 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
11426 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
11427 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
11428 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11429 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11430 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11431 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11432 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11433 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [17,17,17,17]
11434 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
11437 ; AVX2-LABEL: ult_17_v4i32:
11439 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11440 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
11441 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11442 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
11443 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
11444 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
11445 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
11446 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
11447 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
11448 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11449 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11450 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11451 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11452 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11453 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17]
11454 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
11457 ; AVX512VPOPCNTDQ-LABEL: ult_17_v4i32:
11458 ; AVX512VPOPCNTDQ: # %bb.0:
11459 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11460 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
11461 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17]
11462 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
11463 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
11464 ; AVX512VPOPCNTDQ-NEXT: retq
11466 ; AVX512VPOPCNTDQVL-LABEL: ult_17_v4i32:
11467 ; AVX512VPOPCNTDQVL: # %bb.0:
11468 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
11469 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17]
11470 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
11471 ; AVX512VPOPCNTDQVL-NEXT: retq
11473 ; BITALG_NOVLX-LABEL: ult_17_v4i32:
11474 ; BITALG_NOVLX: # %bb.0:
11475 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11476 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
11477 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
11478 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11479 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11480 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11481 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11482 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11483 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17]
11484 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
11485 ; BITALG_NOVLX-NEXT: vzeroupper
11486 ; BITALG_NOVLX-NEXT: retq
11488 ; BITALG-LABEL: ult_17_v4i32:
11490 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
11491 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
11492 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11493 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11494 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11495 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11496 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11497 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17]
11498 ; BITALG-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
11499 ; BITALG-NEXT: retq
11500 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
11501 %3 = icmp ult <4 x i32> %2, <i32 17, i32 17, i32 17, i32 17>
11502 %4 = sext <4 x i1> %3 to <4 x i32>
11506 define <4 x i32> @ugt_17_v4i32(<4 x i32> %0) {
11507 ; SSE2-LABEL: ugt_17_v4i32:
11509 ; SSE2-NEXT: movdqa %xmm0, %xmm1
11510 ; SSE2-NEXT: psrlw $1, %xmm1
11511 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11512 ; SSE2-NEXT: psubb %xmm1, %xmm0
11513 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
11514 ; SSE2-NEXT: movdqa %xmm0, %xmm2
11515 ; SSE2-NEXT: pand %xmm1, %xmm2
11516 ; SSE2-NEXT: psrlw $2, %xmm0
11517 ; SSE2-NEXT: pand %xmm1, %xmm0
11518 ; SSE2-NEXT: paddb %xmm2, %xmm0
11519 ; SSE2-NEXT: movdqa %xmm0, %xmm1
11520 ; SSE2-NEXT: psrlw $4, %xmm1
11521 ; SSE2-NEXT: paddb %xmm1, %xmm0
11522 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
11523 ; SSE2-NEXT: pxor %xmm1, %xmm1
11524 ; SSE2-NEXT: movdqa %xmm0, %xmm2
11525 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
11526 ; SSE2-NEXT: psadbw %xmm1, %xmm2
11527 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
11528 ; SSE2-NEXT: psadbw %xmm1, %xmm0
11529 ; SSE2-NEXT: packuswb %xmm2, %xmm0
11530 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
11533 ; SSE3-LABEL: ugt_17_v4i32:
11535 ; SSE3-NEXT: movdqa %xmm0, %xmm1
11536 ; SSE3-NEXT: psrlw $1, %xmm1
11537 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11538 ; SSE3-NEXT: psubb %xmm1, %xmm0
11539 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
11540 ; SSE3-NEXT: movdqa %xmm0, %xmm2
11541 ; SSE3-NEXT: pand %xmm1, %xmm2
11542 ; SSE3-NEXT: psrlw $2, %xmm0
11543 ; SSE3-NEXT: pand %xmm1, %xmm0
11544 ; SSE3-NEXT: paddb %xmm2, %xmm0
11545 ; SSE3-NEXT: movdqa %xmm0, %xmm1
11546 ; SSE3-NEXT: psrlw $4, %xmm1
11547 ; SSE3-NEXT: paddb %xmm1, %xmm0
11548 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
11549 ; SSE3-NEXT: pxor %xmm1, %xmm1
11550 ; SSE3-NEXT: movdqa %xmm0, %xmm2
11551 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
11552 ; SSE3-NEXT: psadbw %xmm1, %xmm2
11553 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
11554 ; SSE3-NEXT: psadbw %xmm1, %xmm0
11555 ; SSE3-NEXT: packuswb %xmm2, %xmm0
11556 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
11559 ; SSSE3-LABEL: ugt_17_v4i32:
11561 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11562 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
11563 ; SSSE3-NEXT: pand %xmm2, %xmm3
11564 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11565 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
11566 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
11567 ; SSSE3-NEXT: psrlw $4, %xmm0
11568 ; SSSE3-NEXT: pand %xmm2, %xmm0
11569 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
11570 ; SSSE3-NEXT: paddb %xmm4, %xmm1
11571 ; SSSE3-NEXT: pxor %xmm0, %xmm0
11572 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
11573 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
11574 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
11575 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
11576 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
11577 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
11578 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11579 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
11582 ; SSE41-LABEL: ugt_17_v4i32:
11584 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11585 ; SSE41-NEXT: movdqa %xmm0, %xmm2
11586 ; SSE41-NEXT: pand %xmm1, %xmm2
11587 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11588 ; SSE41-NEXT: movdqa %xmm3, %xmm4
11589 ; SSE41-NEXT: pshufb %xmm2, %xmm4
11590 ; SSE41-NEXT: psrlw $4, %xmm0
11591 ; SSE41-NEXT: pand %xmm1, %xmm0
11592 ; SSE41-NEXT: pshufb %xmm0, %xmm3
11593 ; SSE41-NEXT: paddb %xmm4, %xmm3
11594 ; SSE41-NEXT: pxor %xmm1, %xmm1
11595 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
11596 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
11597 ; SSE41-NEXT: psadbw %xmm1, %xmm3
11598 ; SSE41-NEXT: psadbw %xmm1, %xmm0
11599 ; SSE41-NEXT: packuswb %xmm3, %xmm0
11600 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
11603 ; AVX1-LABEL: ugt_17_v4i32:
11605 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11606 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
11607 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11608 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
11609 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
11610 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
11611 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
11612 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
11613 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
11614 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11615 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11616 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11617 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11618 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11619 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
11622 ; AVX2-LABEL: ugt_17_v4i32:
11624 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11625 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
11626 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11627 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
11628 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
11629 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
11630 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
11631 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
11632 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
11633 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11634 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11635 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11636 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11637 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11638 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17]
11639 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
11642 ; AVX512VPOPCNTDQ-LABEL: ugt_17_v4i32:
11643 ; AVX512VPOPCNTDQ: # %bb.0:
11644 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11645 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
11646 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17]
11647 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
11648 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
11649 ; AVX512VPOPCNTDQ-NEXT: retq
11651 ; AVX512VPOPCNTDQVL-LABEL: ugt_17_v4i32:
11652 ; AVX512VPOPCNTDQVL: # %bb.0:
11653 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
11654 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17]
11655 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
11656 ; AVX512VPOPCNTDQVL-NEXT: retq
11658 ; BITALG_NOVLX-LABEL: ugt_17_v4i32:
11659 ; BITALG_NOVLX: # %bb.0:
11660 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11661 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
11662 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
11663 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11664 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11665 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11666 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11667 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11668 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17]
11669 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
11670 ; BITALG_NOVLX-NEXT: vzeroupper
11671 ; BITALG_NOVLX-NEXT: retq
11673 ; BITALG-LABEL: ugt_17_v4i32:
11675 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
11676 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
11677 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11678 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11679 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11680 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11681 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11682 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [17,17,17,17]
11683 ; BITALG-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
11684 ; BITALG-NEXT: retq
11685 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
11686 %3 = icmp ugt <4 x i32> %2, <i32 17, i32 17, i32 17, i32 17>
11687 %4 = sext <4 x i1> %3 to <4 x i32>
11691 define <4 x i32> @ult_18_v4i32(<4 x i32> %0) {
11692 ; SSE2-LABEL: ult_18_v4i32:
11694 ; SSE2-NEXT: movdqa %xmm0, %xmm1
11695 ; SSE2-NEXT: psrlw $1, %xmm1
11696 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11697 ; SSE2-NEXT: psubb %xmm1, %xmm0
11698 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
11699 ; SSE2-NEXT: movdqa %xmm0, %xmm2
11700 ; SSE2-NEXT: pand %xmm1, %xmm2
11701 ; SSE2-NEXT: psrlw $2, %xmm0
11702 ; SSE2-NEXT: pand %xmm1, %xmm0
11703 ; SSE2-NEXT: paddb %xmm2, %xmm0
11704 ; SSE2-NEXT: movdqa %xmm0, %xmm1
11705 ; SSE2-NEXT: psrlw $4, %xmm1
11706 ; SSE2-NEXT: paddb %xmm0, %xmm1
11707 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11708 ; SSE2-NEXT: pxor %xmm0, %xmm0
11709 ; SSE2-NEXT: movdqa %xmm1, %xmm2
11710 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
11711 ; SSE2-NEXT: psadbw %xmm0, %xmm2
11712 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
11713 ; SSE2-NEXT: psadbw %xmm0, %xmm1
11714 ; SSE2-NEXT: packuswb %xmm2, %xmm1
11715 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [18,18,18,18]
11716 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
11719 ; SSE3-LABEL: ult_18_v4i32:
11721 ; SSE3-NEXT: movdqa %xmm0, %xmm1
11722 ; SSE3-NEXT: psrlw $1, %xmm1
11723 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11724 ; SSE3-NEXT: psubb %xmm1, %xmm0
11725 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
11726 ; SSE3-NEXT: movdqa %xmm0, %xmm2
11727 ; SSE3-NEXT: pand %xmm1, %xmm2
11728 ; SSE3-NEXT: psrlw $2, %xmm0
11729 ; SSE3-NEXT: pand %xmm1, %xmm0
11730 ; SSE3-NEXT: paddb %xmm2, %xmm0
11731 ; SSE3-NEXT: movdqa %xmm0, %xmm1
11732 ; SSE3-NEXT: psrlw $4, %xmm1
11733 ; SSE3-NEXT: paddb %xmm0, %xmm1
11734 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11735 ; SSE3-NEXT: pxor %xmm0, %xmm0
11736 ; SSE3-NEXT: movdqa %xmm1, %xmm2
11737 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
11738 ; SSE3-NEXT: psadbw %xmm0, %xmm2
11739 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
11740 ; SSE3-NEXT: psadbw %xmm0, %xmm1
11741 ; SSE3-NEXT: packuswb %xmm2, %xmm1
11742 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [18,18,18,18]
11743 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
11746 ; SSSE3-LABEL: ult_18_v4i32:
11748 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11749 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
11750 ; SSSE3-NEXT: pand %xmm1, %xmm2
11751 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11752 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
11753 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
11754 ; SSSE3-NEXT: psrlw $4, %xmm0
11755 ; SSSE3-NEXT: pand %xmm1, %xmm0
11756 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
11757 ; SSSE3-NEXT: paddb %xmm4, %xmm3
11758 ; SSSE3-NEXT: pxor %xmm0, %xmm0
11759 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
11760 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
11761 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
11762 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
11763 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
11764 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
11765 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [18,18,18,18]
11766 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
11769 ; SSE41-LABEL: ult_18_v4i32:
11771 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11772 ; SSE41-NEXT: movdqa %xmm0, %xmm2
11773 ; SSE41-NEXT: pand %xmm1, %xmm2
11774 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11775 ; SSE41-NEXT: movdqa %xmm3, %xmm4
11776 ; SSE41-NEXT: pshufb %xmm2, %xmm4
11777 ; SSE41-NEXT: psrlw $4, %xmm0
11778 ; SSE41-NEXT: pand %xmm1, %xmm0
11779 ; SSE41-NEXT: pshufb %xmm0, %xmm3
11780 ; SSE41-NEXT: paddb %xmm4, %xmm3
11781 ; SSE41-NEXT: pxor %xmm0, %xmm0
11782 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
11783 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
11784 ; SSE41-NEXT: psadbw %xmm0, %xmm3
11785 ; SSE41-NEXT: psadbw %xmm0, %xmm1
11786 ; SSE41-NEXT: packuswb %xmm3, %xmm1
11787 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [18,18,18,18]
11788 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
11791 ; AVX1-LABEL: ult_18_v4i32:
11793 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11794 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
11795 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11796 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
11797 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
11798 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
11799 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
11800 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
11801 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
11802 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11803 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11804 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11805 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11806 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11807 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [18,18,18,18]
11808 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
11811 ; AVX2-LABEL: ult_18_v4i32:
11813 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11814 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
11815 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11816 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
11817 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
11818 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
11819 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
11820 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
11821 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
11822 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11823 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11824 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11825 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11826 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11827 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18]
11828 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
11831 ; AVX512VPOPCNTDQ-LABEL: ult_18_v4i32:
11832 ; AVX512VPOPCNTDQ: # %bb.0:
11833 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11834 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
11835 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18]
11836 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
11837 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
11838 ; AVX512VPOPCNTDQ-NEXT: retq
11840 ; AVX512VPOPCNTDQVL-LABEL: ult_18_v4i32:
11841 ; AVX512VPOPCNTDQVL: # %bb.0:
11842 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
11843 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18]
11844 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
11845 ; AVX512VPOPCNTDQVL-NEXT: retq
11847 ; BITALG_NOVLX-LABEL: ult_18_v4i32:
11848 ; BITALG_NOVLX: # %bb.0:
11849 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
11850 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
11851 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
11852 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11853 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11854 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11855 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11856 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11857 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18]
11858 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
11859 ; BITALG_NOVLX-NEXT: vzeroupper
11860 ; BITALG_NOVLX-NEXT: retq
11862 ; BITALG-LABEL: ult_18_v4i32:
11864 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
11865 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
11866 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11867 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11868 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11869 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11870 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11871 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18]
11872 ; BITALG-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
11873 ; BITALG-NEXT: retq
11874 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
11875 %3 = icmp ult <4 x i32> %2, <i32 18, i32 18, i32 18, i32 18>
11876 %4 = sext <4 x i1> %3 to <4 x i32>
11880 define <4 x i32> @ugt_18_v4i32(<4 x i32> %0) {
11881 ; SSE2-LABEL: ugt_18_v4i32:
11883 ; SSE2-NEXT: movdqa %xmm0, %xmm1
11884 ; SSE2-NEXT: psrlw $1, %xmm1
11885 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11886 ; SSE2-NEXT: psubb %xmm1, %xmm0
11887 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
11888 ; SSE2-NEXT: movdqa %xmm0, %xmm2
11889 ; SSE2-NEXT: pand %xmm1, %xmm2
11890 ; SSE2-NEXT: psrlw $2, %xmm0
11891 ; SSE2-NEXT: pand %xmm1, %xmm0
11892 ; SSE2-NEXT: paddb %xmm2, %xmm0
11893 ; SSE2-NEXT: movdqa %xmm0, %xmm1
11894 ; SSE2-NEXT: psrlw $4, %xmm1
11895 ; SSE2-NEXT: paddb %xmm1, %xmm0
11896 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
11897 ; SSE2-NEXT: pxor %xmm1, %xmm1
11898 ; SSE2-NEXT: movdqa %xmm0, %xmm2
11899 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
11900 ; SSE2-NEXT: psadbw %xmm1, %xmm2
11901 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
11902 ; SSE2-NEXT: psadbw %xmm1, %xmm0
11903 ; SSE2-NEXT: packuswb %xmm2, %xmm0
11904 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
11907 ; SSE3-LABEL: ugt_18_v4i32:
11909 ; SSE3-NEXT: movdqa %xmm0, %xmm1
11910 ; SSE3-NEXT: psrlw $1, %xmm1
11911 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11912 ; SSE3-NEXT: psubb %xmm1, %xmm0
11913 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
11914 ; SSE3-NEXT: movdqa %xmm0, %xmm2
11915 ; SSE3-NEXT: pand %xmm1, %xmm2
11916 ; SSE3-NEXT: psrlw $2, %xmm0
11917 ; SSE3-NEXT: pand %xmm1, %xmm0
11918 ; SSE3-NEXT: paddb %xmm2, %xmm0
11919 ; SSE3-NEXT: movdqa %xmm0, %xmm1
11920 ; SSE3-NEXT: psrlw $4, %xmm1
11921 ; SSE3-NEXT: paddb %xmm1, %xmm0
11922 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
11923 ; SSE3-NEXT: pxor %xmm1, %xmm1
11924 ; SSE3-NEXT: movdqa %xmm0, %xmm2
11925 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
11926 ; SSE3-NEXT: psadbw %xmm1, %xmm2
11927 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
11928 ; SSE3-NEXT: psadbw %xmm1, %xmm0
11929 ; SSE3-NEXT: packuswb %xmm2, %xmm0
11930 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
11933 ; SSSE3-LABEL: ugt_18_v4i32:
11935 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11936 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
11937 ; SSSE3-NEXT: pand %xmm2, %xmm3
11938 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11939 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
11940 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
11941 ; SSSE3-NEXT: psrlw $4, %xmm0
11942 ; SSSE3-NEXT: pand %xmm2, %xmm0
11943 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
11944 ; SSSE3-NEXT: paddb %xmm4, %xmm1
11945 ; SSSE3-NEXT: pxor %xmm0, %xmm0
11946 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
11947 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
11948 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
11949 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
11950 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
11951 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
11952 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
11953 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
11956 ; SSE41-LABEL: ugt_18_v4i32:
11958 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11959 ; SSE41-NEXT: movdqa %xmm0, %xmm2
11960 ; SSE41-NEXT: pand %xmm1, %xmm2
11961 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11962 ; SSE41-NEXT: movdqa %xmm3, %xmm4
11963 ; SSE41-NEXT: pshufb %xmm2, %xmm4
11964 ; SSE41-NEXT: psrlw $4, %xmm0
11965 ; SSE41-NEXT: pand %xmm1, %xmm0
11966 ; SSE41-NEXT: pshufb %xmm0, %xmm3
11967 ; SSE41-NEXT: paddb %xmm4, %xmm3
11968 ; SSE41-NEXT: pxor %xmm1, %xmm1
11969 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
11970 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
11971 ; SSE41-NEXT: psadbw %xmm1, %xmm3
11972 ; SSE41-NEXT: psadbw %xmm1, %xmm0
11973 ; SSE41-NEXT: packuswb %xmm3, %xmm0
11974 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
11977 ; AVX1-LABEL: ugt_18_v4i32:
11979 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11980 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
11981 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
11982 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
11983 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
11984 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
11985 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
11986 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
11987 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
11988 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
11989 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
11990 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
11991 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
11992 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
11993 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
11996 ; AVX2-LABEL: ugt_18_v4i32:
11998 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
11999 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
12000 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12001 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
12002 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
12003 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
12004 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
12005 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
12006 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
12007 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12008 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12009 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12010 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12011 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12012 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18]
12013 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
12016 ; AVX512VPOPCNTDQ-LABEL: ugt_18_v4i32:
12017 ; AVX512VPOPCNTDQ: # %bb.0:
12018 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12019 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
12020 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18]
12021 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
12022 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
12023 ; AVX512VPOPCNTDQ-NEXT: retq
12025 ; AVX512VPOPCNTDQVL-LABEL: ugt_18_v4i32:
12026 ; AVX512VPOPCNTDQVL: # %bb.0:
12027 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
12028 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18]
12029 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
12030 ; AVX512VPOPCNTDQVL-NEXT: retq
12032 ; BITALG_NOVLX-LABEL: ugt_18_v4i32:
12033 ; BITALG_NOVLX: # %bb.0:
12034 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12035 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
12036 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
12037 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12038 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12039 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12040 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12041 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12042 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18]
12043 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
12044 ; BITALG_NOVLX-NEXT: vzeroupper
12045 ; BITALG_NOVLX-NEXT: retq
12047 ; BITALG-LABEL: ugt_18_v4i32:
12049 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
12050 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
12051 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12052 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12053 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12054 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12055 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12056 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [18,18,18,18]
12057 ; BITALG-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
12058 ; BITALG-NEXT: retq
12059 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
12060 %3 = icmp ugt <4 x i32> %2, <i32 18, i32 18, i32 18, i32 18>
12061 %4 = sext <4 x i1> %3 to <4 x i32>
12065 define <4 x i32> @ult_19_v4i32(<4 x i32> %0) {
12066 ; SSE2-LABEL: ult_19_v4i32:
12068 ; SSE2-NEXT: movdqa %xmm0, %xmm1
12069 ; SSE2-NEXT: psrlw $1, %xmm1
12070 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12071 ; SSE2-NEXT: psubb %xmm1, %xmm0
12072 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12073 ; SSE2-NEXT: movdqa %xmm0, %xmm2
12074 ; SSE2-NEXT: pand %xmm1, %xmm2
12075 ; SSE2-NEXT: psrlw $2, %xmm0
12076 ; SSE2-NEXT: pand %xmm1, %xmm0
12077 ; SSE2-NEXT: paddb %xmm2, %xmm0
12078 ; SSE2-NEXT: movdqa %xmm0, %xmm1
12079 ; SSE2-NEXT: psrlw $4, %xmm1
12080 ; SSE2-NEXT: paddb %xmm0, %xmm1
12081 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12082 ; SSE2-NEXT: pxor %xmm0, %xmm0
12083 ; SSE2-NEXT: movdqa %xmm1, %xmm2
12084 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
12085 ; SSE2-NEXT: psadbw %xmm0, %xmm2
12086 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
12087 ; SSE2-NEXT: psadbw %xmm0, %xmm1
12088 ; SSE2-NEXT: packuswb %xmm2, %xmm1
12089 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [19,19,19,19]
12090 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
12093 ; SSE3-LABEL: ult_19_v4i32:
12095 ; SSE3-NEXT: movdqa %xmm0, %xmm1
12096 ; SSE3-NEXT: psrlw $1, %xmm1
12097 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12098 ; SSE3-NEXT: psubb %xmm1, %xmm0
12099 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12100 ; SSE3-NEXT: movdqa %xmm0, %xmm2
12101 ; SSE3-NEXT: pand %xmm1, %xmm2
12102 ; SSE3-NEXT: psrlw $2, %xmm0
12103 ; SSE3-NEXT: pand %xmm1, %xmm0
12104 ; SSE3-NEXT: paddb %xmm2, %xmm0
12105 ; SSE3-NEXT: movdqa %xmm0, %xmm1
12106 ; SSE3-NEXT: psrlw $4, %xmm1
12107 ; SSE3-NEXT: paddb %xmm0, %xmm1
12108 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12109 ; SSE3-NEXT: pxor %xmm0, %xmm0
12110 ; SSE3-NEXT: movdqa %xmm1, %xmm2
12111 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
12112 ; SSE3-NEXT: psadbw %xmm0, %xmm2
12113 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
12114 ; SSE3-NEXT: psadbw %xmm0, %xmm1
12115 ; SSE3-NEXT: packuswb %xmm2, %xmm1
12116 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [19,19,19,19]
12117 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
12120 ; SSSE3-LABEL: ult_19_v4i32:
12122 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12123 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
12124 ; SSSE3-NEXT: pand %xmm1, %xmm2
12125 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12126 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
12127 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
12128 ; SSSE3-NEXT: psrlw $4, %xmm0
12129 ; SSSE3-NEXT: pand %xmm1, %xmm0
12130 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
12131 ; SSSE3-NEXT: paddb %xmm4, %xmm3
12132 ; SSSE3-NEXT: pxor %xmm0, %xmm0
12133 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
12134 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
12135 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
12136 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
12137 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
12138 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
12139 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [19,19,19,19]
12140 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
12143 ; SSE41-LABEL: ult_19_v4i32:
12145 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12146 ; SSE41-NEXT: movdqa %xmm0, %xmm2
12147 ; SSE41-NEXT: pand %xmm1, %xmm2
12148 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12149 ; SSE41-NEXT: movdqa %xmm3, %xmm4
12150 ; SSE41-NEXT: pshufb %xmm2, %xmm4
12151 ; SSE41-NEXT: psrlw $4, %xmm0
12152 ; SSE41-NEXT: pand %xmm1, %xmm0
12153 ; SSE41-NEXT: pshufb %xmm0, %xmm3
12154 ; SSE41-NEXT: paddb %xmm4, %xmm3
12155 ; SSE41-NEXT: pxor %xmm0, %xmm0
12156 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
12157 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
12158 ; SSE41-NEXT: psadbw %xmm0, %xmm3
12159 ; SSE41-NEXT: psadbw %xmm0, %xmm1
12160 ; SSE41-NEXT: packuswb %xmm3, %xmm1
12161 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [19,19,19,19]
12162 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
12165 ; AVX1-LABEL: ult_19_v4i32:
12167 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12168 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
12169 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12170 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
12171 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
12172 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
12173 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
12174 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
12175 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
12176 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12177 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12178 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12179 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12180 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12181 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [19,19,19,19]
12182 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
12185 ; AVX2-LABEL: ult_19_v4i32:
12187 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12188 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
12189 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12190 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
12191 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
12192 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
12193 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
12194 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
12195 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
12196 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12197 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12198 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12199 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12200 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12201 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19]
12202 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
12205 ; AVX512VPOPCNTDQ-LABEL: ult_19_v4i32:
12206 ; AVX512VPOPCNTDQ: # %bb.0:
12207 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12208 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
12209 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19]
12210 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
12211 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
12212 ; AVX512VPOPCNTDQ-NEXT: retq
12214 ; AVX512VPOPCNTDQVL-LABEL: ult_19_v4i32:
12215 ; AVX512VPOPCNTDQVL: # %bb.0:
12216 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
12217 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19]
12218 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
12219 ; AVX512VPOPCNTDQVL-NEXT: retq
12221 ; BITALG_NOVLX-LABEL: ult_19_v4i32:
12222 ; BITALG_NOVLX: # %bb.0:
12223 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12224 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
12225 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
12226 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12227 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12228 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12229 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12230 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12231 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19]
12232 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
12233 ; BITALG_NOVLX-NEXT: vzeroupper
12234 ; BITALG_NOVLX-NEXT: retq
12236 ; BITALG-LABEL: ult_19_v4i32:
12238 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
12239 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
12240 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12241 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12242 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12243 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12244 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12245 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19]
12246 ; BITALG-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
12247 ; BITALG-NEXT: retq
12248 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
12249 %3 = icmp ult <4 x i32> %2, <i32 19, i32 19, i32 19, i32 19>
12250 %4 = sext <4 x i1> %3 to <4 x i32>
12254 define <4 x i32> @ugt_19_v4i32(<4 x i32> %0) {
12255 ; SSE2-LABEL: ugt_19_v4i32:
12257 ; SSE2-NEXT: movdqa %xmm0, %xmm1
12258 ; SSE2-NEXT: psrlw $1, %xmm1
12259 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12260 ; SSE2-NEXT: psubb %xmm1, %xmm0
12261 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12262 ; SSE2-NEXT: movdqa %xmm0, %xmm2
12263 ; SSE2-NEXT: pand %xmm1, %xmm2
12264 ; SSE2-NEXT: psrlw $2, %xmm0
12265 ; SSE2-NEXT: pand %xmm1, %xmm0
12266 ; SSE2-NEXT: paddb %xmm2, %xmm0
12267 ; SSE2-NEXT: movdqa %xmm0, %xmm1
12268 ; SSE2-NEXT: psrlw $4, %xmm1
12269 ; SSE2-NEXT: paddb %xmm1, %xmm0
12270 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12271 ; SSE2-NEXT: pxor %xmm1, %xmm1
12272 ; SSE2-NEXT: movdqa %xmm0, %xmm2
12273 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
12274 ; SSE2-NEXT: psadbw %xmm1, %xmm2
12275 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
12276 ; SSE2-NEXT: psadbw %xmm1, %xmm0
12277 ; SSE2-NEXT: packuswb %xmm2, %xmm0
12278 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12281 ; SSE3-LABEL: ugt_19_v4i32:
12283 ; SSE3-NEXT: movdqa %xmm0, %xmm1
12284 ; SSE3-NEXT: psrlw $1, %xmm1
12285 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12286 ; SSE3-NEXT: psubb %xmm1, %xmm0
12287 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12288 ; SSE3-NEXT: movdqa %xmm0, %xmm2
12289 ; SSE3-NEXT: pand %xmm1, %xmm2
12290 ; SSE3-NEXT: psrlw $2, %xmm0
12291 ; SSE3-NEXT: pand %xmm1, %xmm0
12292 ; SSE3-NEXT: paddb %xmm2, %xmm0
12293 ; SSE3-NEXT: movdqa %xmm0, %xmm1
12294 ; SSE3-NEXT: psrlw $4, %xmm1
12295 ; SSE3-NEXT: paddb %xmm1, %xmm0
12296 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12297 ; SSE3-NEXT: pxor %xmm1, %xmm1
12298 ; SSE3-NEXT: movdqa %xmm0, %xmm2
12299 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
12300 ; SSE3-NEXT: psadbw %xmm1, %xmm2
12301 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
12302 ; SSE3-NEXT: psadbw %xmm1, %xmm0
12303 ; SSE3-NEXT: packuswb %xmm2, %xmm0
12304 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12307 ; SSSE3-LABEL: ugt_19_v4i32:
12309 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12310 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
12311 ; SSSE3-NEXT: pand %xmm2, %xmm3
12312 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12313 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
12314 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
12315 ; SSSE3-NEXT: psrlw $4, %xmm0
12316 ; SSSE3-NEXT: pand %xmm2, %xmm0
12317 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
12318 ; SSSE3-NEXT: paddb %xmm4, %xmm1
12319 ; SSSE3-NEXT: pxor %xmm0, %xmm0
12320 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
12321 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
12322 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
12323 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
12324 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
12325 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
12326 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12327 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
12330 ; SSE41-LABEL: ugt_19_v4i32:
12332 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12333 ; SSE41-NEXT: movdqa %xmm0, %xmm2
12334 ; SSE41-NEXT: pand %xmm1, %xmm2
12335 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12336 ; SSE41-NEXT: movdqa %xmm3, %xmm4
12337 ; SSE41-NEXT: pshufb %xmm2, %xmm4
12338 ; SSE41-NEXT: psrlw $4, %xmm0
12339 ; SSE41-NEXT: pand %xmm1, %xmm0
12340 ; SSE41-NEXT: pshufb %xmm0, %xmm3
12341 ; SSE41-NEXT: paddb %xmm4, %xmm3
12342 ; SSE41-NEXT: pxor %xmm1, %xmm1
12343 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
12344 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
12345 ; SSE41-NEXT: psadbw %xmm1, %xmm3
12346 ; SSE41-NEXT: psadbw %xmm1, %xmm0
12347 ; SSE41-NEXT: packuswb %xmm3, %xmm0
12348 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12351 ; AVX1-LABEL: ugt_19_v4i32:
12353 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12354 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
12355 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12356 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
12357 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
12358 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
12359 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
12360 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
12361 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
12362 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12363 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12364 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12365 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12366 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12367 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
12370 ; AVX2-LABEL: ugt_19_v4i32:
12372 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12373 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
12374 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12375 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
12376 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
12377 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
12378 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
12379 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
12380 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
12381 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12382 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12383 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12384 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12385 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12386 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19]
12387 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
12390 ; AVX512VPOPCNTDQ-LABEL: ugt_19_v4i32:
12391 ; AVX512VPOPCNTDQ: # %bb.0:
12392 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12393 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
12394 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19]
12395 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
12396 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
12397 ; AVX512VPOPCNTDQ-NEXT: retq
12399 ; AVX512VPOPCNTDQVL-LABEL: ugt_19_v4i32:
12400 ; AVX512VPOPCNTDQVL: # %bb.0:
12401 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
12402 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19]
12403 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
12404 ; AVX512VPOPCNTDQVL-NEXT: retq
12406 ; BITALG_NOVLX-LABEL: ugt_19_v4i32:
12407 ; BITALG_NOVLX: # %bb.0:
12408 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12409 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
12410 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
12411 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12412 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12413 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12414 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12415 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12416 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19]
12417 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
12418 ; BITALG_NOVLX-NEXT: vzeroupper
12419 ; BITALG_NOVLX-NEXT: retq
12421 ; BITALG-LABEL: ugt_19_v4i32:
12423 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
12424 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
12425 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12426 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12427 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12428 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12429 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12430 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [19,19,19,19]
12431 ; BITALG-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
12432 ; BITALG-NEXT: retq
12433 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
12434 %3 = icmp ugt <4 x i32> %2, <i32 19, i32 19, i32 19, i32 19>
12435 %4 = sext <4 x i1> %3 to <4 x i32>
12439 define <4 x i32> @ult_20_v4i32(<4 x i32> %0) {
12440 ; SSE2-LABEL: ult_20_v4i32:
12442 ; SSE2-NEXT: movdqa %xmm0, %xmm1
12443 ; SSE2-NEXT: psrlw $1, %xmm1
12444 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12445 ; SSE2-NEXT: psubb %xmm1, %xmm0
12446 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12447 ; SSE2-NEXT: movdqa %xmm0, %xmm2
12448 ; SSE2-NEXT: pand %xmm1, %xmm2
12449 ; SSE2-NEXT: psrlw $2, %xmm0
12450 ; SSE2-NEXT: pand %xmm1, %xmm0
12451 ; SSE2-NEXT: paddb %xmm2, %xmm0
12452 ; SSE2-NEXT: movdqa %xmm0, %xmm1
12453 ; SSE2-NEXT: psrlw $4, %xmm1
12454 ; SSE2-NEXT: paddb %xmm0, %xmm1
12455 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12456 ; SSE2-NEXT: pxor %xmm0, %xmm0
12457 ; SSE2-NEXT: movdqa %xmm1, %xmm2
12458 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
12459 ; SSE2-NEXT: psadbw %xmm0, %xmm2
12460 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
12461 ; SSE2-NEXT: psadbw %xmm0, %xmm1
12462 ; SSE2-NEXT: packuswb %xmm2, %xmm1
12463 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [20,20,20,20]
12464 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
12467 ; SSE3-LABEL: ult_20_v4i32:
12469 ; SSE3-NEXT: movdqa %xmm0, %xmm1
12470 ; SSE3-NEXT: psrlw $1, %xmm1
12471 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12472 ; SSE3-NEXT: psubb %xmm1, %xmm0
12473 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12474 ; SSE3-NEXT: movdqa %xmm0, %xmm2
12475 ; SSE3-NEXT: pand %xmm1, %xmm2
12476 ; SSE3-NEXT: psrlw $2, %xmm0
12477 ; SSE3-NEXT: pand %xmm1, %xmm0
12478 ; SSE3-NEXT: paddb %xmm2, %xmm0
12479 ; SSE3-NEXT: movdqa %xmm0, %xmm1
12480 ; SSE3-NEXT: psrlw $4, %xmm1
12481 ; SSE3-NEXT: paddb %xmm0, %xmm1
12482 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12483 ; SSE3-NEXT: pxor %xmm0, %xmm0
12484 ; SSE3-NEXT: movdqa %xmm1, %xmm2
12485 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
12486 ; SSE3-NEXT: psadbw %xmm0, %xmm2
12487 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
12488 ; SSE3-NEXT: psadbw %xmm0, %xmm1
12489 ; SSE3-NEXT: packuswb %xmm2, %xmm1
12490 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [20,20,20,20]
12491 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
12494 ; SSSE3-LABEL: ult_20_v4i32:
12496 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12497 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
12498 ; SSSE3-NEXT: pand %xmm1, %xmm2
12499 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12500 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
12501 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
12502 ; SSSE3-NEXT: psrlw $4, %xmm0
12503 ; SSSE3-NEXT: pand %xmm1, %xmm0
12504 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
12505 ; SSSE3-NEXT: paddb %xmm4, %xmm3
12506 ; SSSE3-NEXT: pxor %xmm0, %xmm0
12507 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
12508 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
12509 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
12510 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
12511 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
12512 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
12513 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [20,20,20,20]
12514 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
12517 ; SSE41-LABEL: ult_20_v4i32:
12519 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12520 ; SSE41-NEXT: movdqa %xmm0, %xmm2
12521 ; SSE41-NEXT: pand %xmm1, %xmm2
12522 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12523 ; SSE41-NEXT: movdqa %xmm3, %xmm4
12524 ; SSE41-NEXT: pshufb %xmm2, %xmm4
12525 ; SSE41-NEXT: psrlw $4, %xmm0
12526 ; SSE41-NEXT: pand %xmm1, %xmm0
12527 ; SSE41-NEXT: pshufb %xmm0, %xmm3
12528 ; SSE41-NEXT: paddb %xmm4, %xmm3
12529 ; SSE41-NEXT: pxor %xmm0, %xmm0
12530 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
12531 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
12532 ; SSE41-NEXT: psadbw %xmm0, %xmm3
12533 ; SSE41-NEXT: psadbw %xmm0, %xmm1
12534 ; SSE41-NEXT: packuswb %xmm3, %xmm1
12535 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [20,20,20,20]
12536 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
12539 ; AVX1-LABEL: ult_20_v4i32:
12541 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12542 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
12543 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12544 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
12545 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
12546 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
12547 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
12548 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
12549 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
12550 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12551 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12552 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12553 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12554 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12555 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [20,20,20,20]
12556 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
12559 ; AVX2-LABEL: ult_20_v4i32:
12561 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12562 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
12563 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12564 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
12565 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
12566 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
12567 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
12568 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
12569 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
12570 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12571 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12572 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12573 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12574 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12575 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20]
12576 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
12579 ; AVX512VPOPCNTDQ-LABEL: ult_20_v4i32:
12580 ; AVX512VPOPCNTDQ: # %bb.0:
12581 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12582 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
12583 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20]
12584 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
12585 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
12586 ; AVX512VPOPCNTDQ-NEXT: retq
12588 ; AVX512VPOPCNTDQVL-LABEL: ult_20_v4i32:
12589 ; AVX512VPOPCNTDQVL: # %bb.0:
12590 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
12591 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20]
12592 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
12593 ; AVX512VPOPCNTDQVL-NEXT: retq
12595 ; BITALG_NOVLX-LABEL: ult_20_v4i32:
12596 ; BITALG_NOVLX: # %bb.0:
12597 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12598 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
12599 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
12600 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12601 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12602 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12603 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12604 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12605 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20]
12606 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
12607 ; BITALG_NOVLX-NEXT: vzeroupper
12608 ; BITALG_NOVLX-NEXT: retq
12610 ; BITALG-LABEL: ult_20_v4i32:
12612 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
12613 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
12614 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12615 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12616 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12617 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12618 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12619 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20]
12620 ; BITALG-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
12621 ; BITALG-NEXT: retq
12622 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
12623 %3 = icmp ult <4 x i32> %2, <i32 20, i32 20, i32 20, i32 20>
12624 %4 = sext <4 x i1> %3 to <4 x i32>
12628 define <4 x i32> @ugt_20_v4i32(<4 x i32> %0) {
12629 ; SSE2-LABEL: ugt_20_v4i32:
12631 ; SSE2-NEXT: movdqa %xmm0, %xmm1
12632 ; SSE2-NEXT: psrlw $1, %xmm1
12633 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12634 ; SSE2-NEXT: psubb %xmm1, %xmm0
12635 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12636 ; SSE2-NEXT: movdqa %xmm0, %xmm2
12637 ; SSE2-NEXT: pand %xmm1, %xmm2
12638 ; SSE2-NEXT: psrlw $2, %xmm0
12639 ; SSE2-NEXT: pand %xmm1, %xmm0
12640 ; SSE2-NEXT: paddb %xmm2, %xmm0
12641 ; SSE2-NEXT: movdqa %xmm0, %xmm1
12642 ; SSE2-NEXT: psrlw $4, %xmm1
12643 ; SSE2-NEXT: paddb %xmm1, %xmm0
12644 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12645 ; SSE2-NEXT: pxor %xmm1, %xmm1
12646 ; SSE2-NEXT: movdqa %xmm0, %xmm2
12647 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
12648 ; SSE2-NEXT: psadbw %xmm1, %xmm2
12649 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
12650 ; SSE2-NEXT: psadbw %xmm1, %xmm0
12651 ; SSE2-NEXT: packuswb %xmm2, %xmm0
12652 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12655 ; SSE3-LABEL: ugt_20_v4i32:
12657 ; SSE3-NEXT: movdqa %xmm0, %xmm1
12658 ; SSE3-NEXT: psrlw $1, %xmm1
12659 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12660 ; SSE3-NEXT: psubb %xmm1, %xmm0
12661 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12662 ; SSE3-NEXT: movdqa %xmm0, %xmm2
12663 ; SSE3-NEXT: pand %xmm1, %xmm2
12664 ; SSE3-NEXT: psrlw $2, %xmm0
12665 ; SSE3-NEXT: pand %xmm1, %xmm0
12666 ; SSE3-NEXT: paddb %xmm2, %xmm0
12667 ; SSE3-NEXT: movdqa %xmm0, %xmm1
12668 ; SSE3-NEXT: psrlw $4, %xmm1
12669 ; SSE3-NEXT: paddb %xmm1, %xmm0
12670 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12671 ; SSE3-NEXT: pxor %xmm1, %xmm1
12672 ; SSE3-NEXT: movdqa %xmm0, %xmm2
12673 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
12674 ; SSE3-NEXT: psadbw %xmm1, %xmm2
12675 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
12676 ; SSE3-NEXT: psadbw %xmm1, %xmm0
12677 ; SSE3-NEXT: packuswb %xmm2, %xmm0
12678 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12681 ; SSSE3-LABEL: ugt_20_v4i32:
12683 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12684 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
12685 ; SSSE3-NEXT: pand %xmm2, %xmm3
12686 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12687 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
12688 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
12689 ; SSSE3-NEXT: psrlw $4, %xmm0
12690 ; SSSE3-NEXT: pand %xmm2, %xmm0
12691 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
12692 ; SSSE3-NEXT: paddb %xmm4, %xmm1
12693 ; SSSE3-NEXT: pxor %xmm0, %xmm0
12694 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
12695 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
12696 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
12697 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
12698 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
12699 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
12700 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12701 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
12704 ; SSE41-LABEL: ugt_20_v4i32:
12706 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12707 ; SSE41-NEXT: movdqa %xmm0, %xmm2
12708 ; SSE41-NEXT: pand %xmm1, %xmm2
12709 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12710 ; SSE41-NEXT: movdqa %xmm3, %xmm4
12711 ; SSE41-NEXT: pshufb %xmm2, %xmm4
12712 ; SSE41-NEXT: psrlw $4, %xmm0
12713 ; SSE41-NEXT: pand %xmm1, %xmm0
12714 ; SSE41-NEXT: pshufb %xmm0, %xmm3
12715 ; SSE41-NEXT: paddb %xmm4, %xmm3
12716 ; SSE41-NEXT: pxor %xmm1, %xmm1
12717 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
12718 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
12719 ; SSE41-NEXT: psadbw %xmm1, %xmm3
12720 ; SSE41-NEXT: psadbw %xmm1, %xmm0
12721 ; SSE41-NEXT: packuswb %xmm3, %xmm0
12722 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12725 ; AVX1-LABEL: ugt_20_v4i32:
12727 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12728 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
12729 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12730 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
12731 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
12732 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
12733 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
12734 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
12735 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
12736 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12737 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12738 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12739 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12740 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12741 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
12744 ; AVX2-LABEL: ugt_20_v4i32:
12746 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12747 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
12748 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12749 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
12750 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
12751 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
12752 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
12753 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
12754 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
12755 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12756 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12757 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12758 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12759 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12760 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20]
12761 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
12764 ; AVX512VPOPCNTDQ-LABEL: ugt_20_v4i32:
12765 ; AVX512VPOPCNTDQ: # %bb.0:
12766 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12767 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
12768 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20]
12769 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
12770 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
12771 ; AVX512VPOPCNTDQ-NEXT: retq
12773 ; AVX512VPOPCNTDQVL-LABEL: ugt_20_v4i32:
12774 ; AVX512VPOPCNTDQVL: # %bb.0:
12775 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
12776 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20]
12777 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
12778 ; AVX512VPOPCNTDQVL-NEXT: retq
12780 ; BITALG_NOVLX-LABEL: ugt_20_v4i32:
12781 ; BITALG_NOVLX: # %bb.0:
12782 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12783 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
12784 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
12785 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12786 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12787 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12788 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12789 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12790 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20]
12791 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
12792 ; BITALG_NOVLX-NEXT: vzeroupper
12793 ; BITALG_NOVLX-NEXT: retq
12795 ; BITALG-LABEL: ugt_20_v4i32:
12797 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
12798 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
12799 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12800 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12801 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12802 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12803 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12804 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [20,20,20,20]
12805 ; BITALG-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
12806 ; BITALG-NEXT: retq
12807 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
12808 %3 = icmp ugt <4 x i32> %2, <i32 20, i32 20, i32 20, i32 20>
12809 %4 = sext <4 x i1> %3 to <4 x i32>
12813 define <4 x i32> @ult_21_v4i32(<4 x i32> %0) {
12814 ; SSE2-LABEL: ult_21_v4i32:
12816 ; SSE2-NEXT: movdqa %xmm0, %xmm1
12817 ; SSE2-NEXT: psrlw $1, %xmm1
12818 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12819 ; SSE2-NEXT: psubb %xmm1, %xmm0
12820 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12821 ; SSE2-NEXT: movdqa %xmm0, %xmm2
12822 ; SSE2-NEXT: pand %xmm1, %xmm2
12823 ; SSE2-NEXT: psrlw $2, %xmm0
12824 ; SSE2-NEXT: pand %xmm1, %xmm0
12825 ; SSE2-NEXT: paddb %xmm2, %xmm0
12826 ; SSE2-NEXT: movdqa %xmm0, %xmm1
12827 ; SSE2-NEXT: psrlw $4, %xmm1
12828 ; SSE2-NEXT: paddb %xmm0, %xmm1
12829 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12830 ; SSE2-NEXT: pxor %xmm0, %xmm0
12831 ; SSE2-NEXT: movdqa %xmm1, %xmm2
12832 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
12833 ; SSE2-NEXT: psadbw %xmm0, %xmm2
12834 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
12835 ; SSE2-NEXT: psadbw %xmm0, %xmm1
12836 ; SSE2-NEXT: packuswb %xmm2, %xmm1
12837 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [21,21,21,21]
12838 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
12841 ; SSE3-LABEL: ult_21_v4i32:
12843 ; SSE3-NEXT: movdqa %xmm0, %xmm1
12844 ; SSE3-NEXT: psrlw $1, %xmm1
12845 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12846 ; SSE3-NEXT: psubb %xmm1, %xmm0
12847 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
12848 ; SSE3-NEXT: movdqa %xmm0, %xmm2
12849 ; SSE3-NEXT: pand %xmm1, %xmm2
12850 ; SSE3-NEXT: psrlw $2, %xmm0
12851 ; SSE3-NEXT: pand %xmm1, %xmm0
12852 ; SSE3-NEXT: paddb %xmm2, %xmm0
12853 ; SSE3-NEXT: movdqa %xmm0, %xmm1
12854 ; SSE3-NEXT: psrlw $4, %xmm1
12855 ; SSE3-NEXT: paddb %xmm0, %xmm1
12856 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
12857 ; SSE3-NEXT: pxor %xmm0, %xmm0
12858 ; SSE3-NEXT: movdqa %xmm1, %xmm2
12859 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
12860 ; SSE3-NEXT: psadbw %xmm0, %xmm2
12861 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
12862 ; SSE3-NEXT: psadbw %xmm0, %xmm1
12863 ; SSE3-NEXT: packuswb %xmm2, %xmm1
12864 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [21,21,21,21]
12865 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
12868 ; SSSE3-LABEL: ult_21_v4i32:
12870 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12871 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
12872 ; SSSE3-NEXT: pand %xmm1, %xmm2
12873 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12874 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
12875 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
12876 ; SSSE3-NEXT: psrlw $4, %xmm0
12877 ; SSSE3-NEXT: pand %xmm1, %xmm0
12878 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
12879 ; SSSE3-NEXT: paddb %xmm4, %xmm3
12880 ; SSSE3-NEXT: pxor %xmm0, %xmm0
12881 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
12882 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
12883 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
12884 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
12885 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
12886 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
12887 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [21,21,21,21]
12888 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
12891 ; SSE41-LABEL: ult_21_v4i32:
12893 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12894 ; SSE41-NEXT: movdqa %xmm0, %xmm2
12895 ; SSE41-NEXT: pand %xmm1, %xmm2
12896 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12897 ; SSE41-NEXT: movdqa %xmm3, %xmm4
12898 ; SSE41-NEXT: pshufb %xmm2, %xmm4
12899 ; SSE41-NEXT: psrlw $4, %xmm0
12900 ; SSE41-NEXT: pand %xmm1, %xmm0
12901 ; SSE41-NEXT: pshufb %xmm0, %xmm3
12902 ; SSE41-NEXT: paddb %xmm4, %xmm3
12903 ; SSE41-NEXT: pxor %xmm0, %xmm0
12904 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
12905 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
12906 ; SSE41-NEXT: psadbw %xmm0, %xmm3
12907 ; SSE41-NEXT: psadbw %xmm0, %xmm1
12908 ; SSE41-NEXT: packuswb %xmm3, %xmm1
12909 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [21,21,21,21]
12910 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
12913 ; AVX1-LABEL: ult_21_v4i32:
12915 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12916 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
12917 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12918 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
12919 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
12920 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
12921 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
12922 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
12923 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
12924 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12925 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12926 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12927 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12928 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12929 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [21,21,21,21]
12930 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
12933 ; AVX2-LABEL: ult_21_v4i32:
12935 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
12936 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
12937 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
12938 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
12939 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
12940 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
12941 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
12942 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
12943 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
12944 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12945 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12946 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12947 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12948 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12949 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21]
12950 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
12953 ; AVX512VPOPCNTDQ-LABEL: ult_21_v4i32:
12954 ; AVX512VPOPCNTDQ: # %bb.0:
12955 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12956 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
12957 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21]
12958 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
12959 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
12960 ; AVX512VPOPCNTDQ-NEXT: retq
12962 ; AVX512VPOPCNTDQVL-LABEL: ult_21_v4i32:
12963 ; AVX512VPOPCNTDQVL: # %bb.0:
12964 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
12965 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21]
12966 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
12967 ; AVX512VPOPCNTDQVL-NEXT: retq
12969 ; BITALG_NOVLX-LABEL: ult_21_v4i32:
12970 ; BITALG_NOVLX: # %bb.0:
12971 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
12972 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
12973 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
12974 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12975 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12976 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12977 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12978 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12979 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21]
12980 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
12981 ; BITALG_NOVLX-NEXT: vzeroupper
12982 ; BITALG_NOVLX-NEXT: retq
12984 ; BITALG-LABEL: ult_21_v4i32:
12986 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
12987 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
12988 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
12989 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
12990 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
12991 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
12992 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
12993 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21]
12994 ; BITALG-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
12995 ; BITALG-NEXT: retq
12996 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
12997 %3 = icmp ult <4 x i32> %2, <i32 21, i32 21, i32 21, i32 21>
12998 %4 = sext <4 x i1> %3 to <4 x i32>
13002 define <4 x i32> @ugt_21_v4i32(<4 x i32> %0) {
13003 ; SSE2-LABEL: ugt_21_v4i32:
13005 ; SSE2-NEXT: movdqa %xmm0, %xmm1
13006 ; SSE2-NEXT: psrlw $1, %xmm1
13007 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13008 ; SSE2-NEXT: psubb %xmm1, %xmm0
13009 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13010 ; SSE2-NEXT: movdqa %xmm0, %xmm2
13011 ; SSE2-NEXT: pand %xmm1, %xmm2
13012 ; SSE2-NEXT: psrlw $2, %xmm0
13013 ; SSE2-NEXT: pand %xmm1, %xmm0
13014 ; SSE2-NEXT: paddb %xmm2, %xmm0
13015 ; SSE2-NEXT: movdqa %xmm0, %xmm1
13016 ; SSE2-NEXT: psrlw $4, %xmm1
13017 ; SSE2-NEXT: paddb %xmm1, %xmm0
13018 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
13019 ; SSE2-NEXT: pxor %xmm1, %xmm1
13020 ; SSE2-NEXT: movdqa %xmm0, %xmm2
13021 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
13022 ; SSE2-NEXT: psadbw %xmm1, %xmm2
13023 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
13024 ; SSE2-NEXT: psadbw %xmm1, %xmm0
13025 ; SSE2-NEXT: packuswb %xmm2, %xmm0
13026 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
13029 ; SSE3-LABEL: ugt_21_v4i32:
13031 ; SSE3-NEXT: movdqa %xmm0, %xmm1
13032 ; SSE3-NEXT: psrlw $1, %xmm1
13033 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13034 ; SSE3-NEXT: psubb %xmm1, %xmm0
13035 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13036 ; SSE3-NEXT: movdqa %xmm0, %xmm2
13037 ; SSE3-NEXT: pand %xmm1, %xmm2
13038 ; SSE3-NEXT: psrlw $2, %xmm0
13039 ; SSE3-NEXT: pand %xmm1, %xmm0
13040 ; SSE3-NEXT: paddb %xmm2, %xmm0
13041 ; SSE3-NEXT: movdqa %xmm0, %xmm1
13042 ; SSE3-NEXT: psrlw $4, %xmm1
13043 ; SSE3-NEXT: paddb %xmm1, %xmm0
13044 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
13045 ; SSE3-NEXT: pxor %xmm1, %xmm1
13046 ; SSE3-NEXT: movdqa %xmm0, %xmm2
13047 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
13048 ; SSE3-NEXT: psadbw %xmm1, %xmm2
13049 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
13050 ; SSE3-NEXT: psadbw %xmm1, %xmm0
13051 ; SSE3-NEXT: packuswb %xmm2, %xmm0
13052 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
13055 ; SSSE3-LABEL: ugt_21_v4i32:
13057 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13058 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
13059 ; SSSE3-NEXT: pand %xmm2, %xmm3
13060 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13061 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
13062 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
13063 ; SSSE3-NEXT: psrlw $4, %xmm0
13064 ; SSSE3-NEXT: pand %xmm2, %xmm0
13065 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
13066 ; SSSE3-NEXT: paddb %xmm4, %xmm1
13067 ; SSSE3-NEXT: pxor %xmm0, %xmm0
13068 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
13069 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
13070 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
13071 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
13072 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
13073 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
13074 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13075 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
13078 ; SSE41-LABEL: ugt_21_v4i32:
13080 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13081 ; SSE41-NEXT: movdqa %xmm0, %xmm2
13082 ; SSE41-NEXT: pand %xmm1, %xmm2
13083 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13084 ; SSE41-NEXT: movdqa %xmm3, %xmm4
13085 ; SSE41-NEXT: pshufb %xmm2, %xmm4
13086 ; SSE41-NEXT: psrlw $4, %xmm0
13087 ; SSE41-NEXT: pand %xmm1, %xmm0
13088 ; SSE41-NEXT: pshufb %xmm0, %xmm3
13089 ; SSE41-NEXT: paddb %xmm4, %xmm3
13090 ; SSE41-NEXT: pxor %xmm1, %xmm1
13091 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
13092 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
13093 ; SSE41-NEXT: psadbw %xmm1, %xmm3
13094 ; SSE41-NEXT: psadbw %xmm1, %xmm0
13095 ; SSE41-NEXT: packuswb %xmm3, %xmm0
13096 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
13099 ; AVX1-LABEL: ugt_21_v4i32:
13101 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13102 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
13103 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13104 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
13105 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
13106 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
13107 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
13108 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
13109 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
13110 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13111 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13112 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13113 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13114 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13115 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
13118 ; AVX2-LABEL: ugt_21_v4i32:
13120 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13121 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
13122 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13123 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
13124 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
13125 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
13126 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
13127 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
13128 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
13129 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13130 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13131 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13132 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13133 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13134 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21]
13135 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
13138 ; AVX512VPOPCNTDQ-LABEL: ugt_21_v4i32:
13139 ; AVX512VPOPCNTDQ: # %bb.0:
13140 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13141 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
13142 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21]
13143 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
13144 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
13145 ; AVX512VPOPCNTDQ-NEXT: retq
13147 ; AVX512VPOPCNTDQVL-LABEL: ugt_21_v4i32:
13148 ; AVX512VPOPCNTDQVL: # %bb.0:
13149 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
13150 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21]
13151 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
13152 ; AVX512VPOPCNTDQVL-NEXT: retq
13154 ; BITALG_NOVLX-LABEL: ugt_21_v4i32:
13155 ; BITALG_NOVLX: # %bb.0:
13156 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13157 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
13158 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
13159 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13160 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13161 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13162 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13163 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13164 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21]
13165 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
13166 ; BITALG_NOVLX-NEXT: vzeroupper
13167 ; BITALG_NOVLX-NEXT: retq
13169 ; BITALG-LABEL: ugt_21_v4i32:
13171 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
13172 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
13173 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13174 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13175 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13176 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13177 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13178 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [21,21,21,21]
13179 ; BITALG-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
13180 ; BITALG-NEXT: retq
13181 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
13182 %3 = icmp ugt <4 x i32> %2, <i32 21, i32 21, i32 21, i32 21>
13183 %4 = sext <4 x i1> %3 to <4 x i32>
13187 define <4 x i32> @ult_22_v4i32(<4 x i32> %0) {
13188 ; SSE2-LABEL: ult_22_v4i32:
13190 ; SSE2-NEXT: movdqa %xmm0, %xmm1
13191 ; SSE2-NEXT: psrlw $1, %xmm1
13192 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13193 ; SSE2-NEXT: psubb %xmm1, %xmm0
13194 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13195 ; SSE2-NEXT: movdqa %xmm0, %xmm2
13196 ; SSE2-NEXT: pand %xmm1, %xmm2
13197 ; SSE2-NEXT: psrlw $2, %xmm0
13198 ; SSE2-NEXT: pand %xmm1, %xmm0
13199 ; SSE2-NEXT: paddb %xmm2, %xmm0
13200 ; SSE2-NEXT: movdqa %xmm0, %xmm1
13201 ; SSE2-NEXT: psrlw $4, %xmm1
13202 ; SSE2-NEXT: paddb %xmm0, %xmm1
13203 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13204 ; SSE2-NEXT: pxor %xmm0, %xmm0
13205 ; SSE2-NEXT: movdqa %xmm1, %xmm2
13206 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
13207 ; SSE2-NEXT: psadbw %xmm0, %xmm2
13208 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
13209 ; SSE2-NEXT: psadbw %xmm0, %xmm1
13210 ; SSE2-NEXT: packuswb %xmm2, %xmm1
13211 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [22,22,22,22]
13212 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
13215 ; SSE3-LABEL: ult_22_v4i32:
13217 ; SSE3-NEXT: movdqa %xmm0, %xmm1
13218 ; SSE3-NEXT: psrlw $1, %xmm1
13219 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13220 ; SSE3-NEXT: psubb %xmm1, %xmm0
13221 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13222 ; SSE3-NEXT: movdqa %xmm0, %xmm2
13223 ; SSE3-NEXT: pand %xmm1, %xmm2
13224 ; SSE3-NEXT: psrlw $2, %xmm0
13225 ; SSE3-NEXT: pand %xmm1, %xmm0
13226 ; SSE3-NEXT: paddb %xmm2, %xmm0
13227 ; SSE3-NEXT: movdqa %xmm0, %xmm1
13228 ; SSE3-NEXT: psrlw $4, %xmm1
13229 ; SSE3-NEXT: paddb %xmm0, %xmm1
13230 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13231 ; SSE3-NEXT: pxor %xmm0, %xmm0
13232 ; SSE3-NEXT: movdqa %xmm1, %xmm2
13233 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
13234 ; SSE3-NEXT: psadbw %xmm0, %xmm2
13235 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
13236 ; SSE3-NEXT: psadbw %xmm0, %xmm1
13237 ; SSE3-NEXT: packuswb %xmm2, %xmm1
13238 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [22,22,22,22]
13239 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
13242 ; SSSE3-LABEL: ult_22_v4i32:
13244 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13245 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
13246 ; SSSE3-NEXT: pand %xmm1, %xmm2
13247 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13248 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
13249 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
13250 ; SSSE3-NEXT: psrlw $4, %xmm0
13251 ; SSSE3-NEXT: pand %xmm1, %xmm0
13252 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
13253 ; SSSE3-NEXT: paddb %xmm4, %xmm3
13254 ; SSSE3-NEXT: pxor %xmm0, %xmm0
13255 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
13256 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
13257 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
13258 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
13259 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
13260 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
13261 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [22,22,22,22]
13262 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
13265 ; SSE41-LABEL: ult_22_v4i32:
13267 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13268 ; SSE41-NEXT: movdqa %xmm0, %xmm2
13269 ; SSE41-NEXT: pand %xmm1, %xmm2
13270 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13271 ; SSE41-NEXT: movdqa %xmm3, %xmm4
13272 ; SSE41-NEXT: pshufb %xmm2, %xmm4
13273 ; SSE41-NEXT: psrlw $4, %xmm0
13274 ; SSE41-NEXT: pand %xmm1, %xmm0
13275 ; SSE41-NEXT: pshufb %xmm0, %xmm3
13276 ; SSE41-NEXT: paddb %xmm4, %xmm3
13277 ; SSE41-NEXT: pxor %xmm0, %xmm0
13278 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
13279 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
13280 ; SSE41-NEXT: psadbw %xmm0, %xmm3
13281 ; SSE41-NEXT: psadbw %xmm0, %xmm1
13282 ; SSE41-NEXT: packuswb %xmm3, %xmm1
13283 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [22,22,22,22]
13284 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
13287 ; AVX1-LABEL: ult_22_v4i32:
13289 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13290 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
13291 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13292 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
13293 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
13294 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
13295 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
13296 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
13297 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
13298 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13299 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13300 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13301 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13302 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13303 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [22,22,22,22]
13304 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
13307 ; AVX2-LABEL: ult_22_v4i32:
13309 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13310 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
13311 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13312 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
13313 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
13314 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
13315 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
13316 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
13317 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
13318 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13319 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13320 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13321 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13322 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13323 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22]
13324 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
13327 ; AVX512VPOPCNTDQ-LABEL: ult_22_v4i32:
13328 ; AVX512VPOPCNTDQ: # %bb.0:
13329 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13330 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
13331 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22]
13332 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
13333 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
13334 ; AVX512VPOPCNTDQ-NEXT: retq
13336 ; AVX512VPOPCNTDQVL-LABEL: ult_22_v4i32:
13337 ; AVX512VPOPCNTDQVL: # %bb.0:
13338 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
13339 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22]
13340 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
13341 ; AVX512VPOPCNTDQVL-NEXT: retq
13343 ; BITALG_NOVLX-LABEL: ult_22_v4i32:
13344 ; BITALG_NOVLX: # %bb.0:
13345 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13346 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
13347 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
13348 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13349 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13350 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13351 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13352 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13353 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22]
13354 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
13355 ; BITALG_NOVLX-NEXT: vzeroupper
13356 ; BITALG_NOVLX-NEXT: retq
13358 ; BITALG-LABEL: ult_22_v4i32:
13360 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
13361 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
13362 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13363 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13364 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13365 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13366 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13367 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22]
13368 ; BITALG-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
13369 ; BITALG-NEXT: retq
13370 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
13371 %3 = icmp ult <4 x i32> %2, <i32 22, i32 22, i32 22, i32 22>
13372 %4 = sext <4 x i1> %3 to <4 x i32>
13376 define <4 x i32> @ugt_22_v4i32(<4 x i32> %0) {
13377 ; SSE2-LABEL: ugt_22_v4i32:
13379 ; SSE2-NEXT: movdqa %xmm0, %xmm1
13380 ; SSE2-NEXT: psrlw $1, %xmm1
13381 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13382 ; SSE2-NEXT: psubb %xmm1, %xmm0
13383 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13384 ; SSE2-NEXT: movdqa %xmm0, %xmm2
13385 ; SSE2-NEXT: pand %xmm1, %xmm2
13386 ; SSE2-NEXT: psrlw $2, %xmm0
13387 ; SSE2-NEXT: pand %xmm1, %xmm0
13388 ; SSE2-NEXT: paddb %xmm2, %xmm0
13389 ; SSE2-NEXT: movdqa %xmm0, %xmm1
13390 ; SSE2-NEXT: psrlw $4, %xmm1
13391 ; SSE2-NEXT: paddb %xmm1, %xmm0
13392 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
13393 ; SSE2-NEXT: pxor %xmm1, %xmm1
13394 ; SSE2-NEXT: movdqa %xmm0, %xmm2
13395 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
13396 ; SSE2-NEXT: psadbw %xmm1, %xmm2
13397 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
13398 ; SSE2-NEXT: psadbw %xmm1, %xmm0
13399 ; SSE2-NEXT: packuswb %xmm2, %xmm0
13400 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
13403 ; SSE3-LABEL: ugt_22_v4i32:
13405 ; SSE3-NEXT: movdqa %xmm0, %xmm1
13406 ; SSE3-NEXT: psrlw $1, %xmm1
13407 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13408 ; SSE3-NEXT: psubb %xmm1, %xmm0
13409 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13410 ; SSE3-NEXT: movdqa %xmm0, %xmm2
13411 ; SSE3-NEXT: pand %xmm1, %xmm2
13412 ; SSE3-NEXT: psrlw $2, %xmm0
13413 ; SSE3-NEXT: pand %xmm1, %xmm0
13414 ; SSE3-NEXT: paddb %xmm2, %xmm0
13415 ; SSE3-NEXT: movdqa %xmm0, %xmm1
13416 ; SSE3-NEXT: psrlw $4, %xmm1
13417 ; SSE3-NEXT: paddb %xmm1, %xmm0
13418 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
13419 ; SSE3-NEXT: pxor %xmm1, %xmm1
13420 ; SSE3-NEXT: movdqa %xmm0, %xmm2
13421 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
13422 ; SSE3-NEXT: psadbw %xmm1, %xmm2
13423 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
13424 ; SSE3-NEXT: psadbw %xmm1, %xmm0
13425 ; SSE3-NEXT: packuswb %xmm2, %xmm0
13426 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
13429 ; SSSE3-LABEL: ugt_22_v4i32:
13431 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13432 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
13433 ; SSSE3-NEXT: pand %xmm2, %xmm3
13434 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13435 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
13436 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
13437 ; SSSE3-NEXT: psrlw $4, %xmm0
13438 ; SSSE3-NEXT: pand %xmm2, %xmm0
13439 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
13440 ; SSSE3-NEXT: paddb %xmm4, %xmm1
13441 ; SSSE3-NEXT: pxor %xmm0, %xmm0
13442 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
13443 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
13444 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
13445 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
13446 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
13447 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
13448 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13449 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
13452 ; SSE41-LABEL: ugt_22_v4i32:
13454 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13455 ; SSE41-NEXT: movdqa %xmm0, %xmm2
13456 ; SSE41-NEXT: pand %xmm1, %xmm2
13457 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13458 ; SSE41-NEXT: movdqa %xmm3, %xmm4
13459 ; SSE41-NEXT: pshufb %xmm2, %xmm4
13460 ; SSE41-NEXT: psrlw $4, %xmm0
13461 ; SSE41-NEXT: pand %xmm1, %xmm0
13462 ; SSE41-NEXT: pshufb %xmm0, %xmm3
13463 ; SSE41-NEXT: paddb %xmm4, %xmm3
13464 ; SSE41-NEXT: pxor %xmm1, %xmm1
13465 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
13466 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
13467 ; SSE41-NEXT: psadbw %xmm1, %xmm3
13468 ; SSE41-NEXT: psadbw %xmm1, %xmm0
13469 ; SSE41-NEXT: packuswb %xmm3, %xmm0
13470 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
13473 ; AVX1-LABEL: ugt_22_v4i32:
13475 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13476 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
13477 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13478 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
13479 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
13480 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
13481 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
13482 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
13483 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
13484 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13485 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13486 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13487 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13488 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13489 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
13492 ; AVX2-LABEL: ugt_22_v4i32:
13494 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13495 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
13496 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13497 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
13498 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
13499 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
13500 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
13501 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
13502 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
13503 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13504 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13505 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13506 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13507 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13508 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22]
13509 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
13512 ; AVX512VPOPCNTDQ-LABEL: ugt_22_v4i32:
13513 ; AVX512VPOPCNTDQ: # %bb.0:
13514 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13515 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
13516 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22]
13517 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
13518 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
13519 ; AVX512VPOPCNTDQ-NEXT: retq
13521 ; AVX512VPOPCNTDQVL-LABEL: ugt_22_v4i32:
13522 ; AVX512VPOPCNTDQVL: # %bb.0:
13523 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
13524 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22]
13525 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
13526 ; AVX512VPOPCNTDQVL-NEXT: retq
13528 ; BITALG_NOVLX-LABEL: ugt_22_v4i32:
13529 ; BITALG_NOVLX: # %bb.0:
13530 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13531 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
13532 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
13533 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13534 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13535 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13536 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13537 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13538 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22]
13539 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
13540 ; BITALG_NOVLX-NEXT: vzeroupper
13541 ; BITALG_NOVLX-NEXT: retq
13543 ; BITALG-LABEL: ugt_22_v4i32:
13545 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
13546 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
13547 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13548 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13549 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13550 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13551 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13552 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [22,22,22,22]
13553 ; BITALG-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
13554 ; BITALG-NEXT: retq
13555 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
13556 %3 = icmp ugt <4 x i32> %2, <i32 22, i32 22, i32 22, i32 22>
13557 %4 = sext <4 x i1> %3 to <4 x i32>
13561 define <4 x i32> @ult_23_v4i32(<4 x i32> %0) {
13562 ; SSE2-LABEL: ult_23_v4i32:
13564 ; SSE2-NEXT: movdqa %xmm0, %xmm1
13565 ; SSE2-NEXT: psrlw $1, %xmm1
13566 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13567 ; SSE2-NEXT: psubb %xmm1, %xmm0
13568 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13569 ; SSE2-NEXT: movdqa %xmm0, %xmm2
13570 ; SSE2-NEXT: pand %xmm1, %xmm2
13571 ; SSE2-NEXT: psrlw $2, %xmm0
13572 ; SSE2-NEXT: pand %xmm1, %xmm0
13573 ; SSE2-NEXT: paddb %xmm2, %xmm0
13574 ; SSE2-NEXT: movdqa %xmm0, %xmm1
13575 ; SSE2-NEXT: psrlw $4, %xmm1
13576 ; SSE2-NEXT: paddb %xmm0, %xmm1
13577 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13578 ; SSE2-NEXT: pxor %xmm0, %xmm0
13579 ; SSE2-NEXT: movdqa %xmm1, %xmm2
13580 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
13581 ; SSE2-NEXT: psadbw %xmm0, %xmm2
13582 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
13583 ; SSE2-NEXT: psadbw %xmm0, %xmm1
13584 ; SSE2-NEXT: packuswb %xmm2, %xmm1
13585 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [23,23,23,23]
13586 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
13589 ; SSE3-LABEL: ult_23_v4i32:
13591 ; SSE3-NEXT: movdqa %xmm0, %xmm1
13592 ; SSE3-NEXT: psrlw $1, %xmm1
13593 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13594 ; SSE3-NEXT: psubb %xmm1, %xmm0
13595 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13596 ; SSE3-NEXT: movdqa %xmm0, %xmm2
13597 ; SSE3-NEXT: pand %xmm1, %xmm2
13598 ; SSE3-NEXT: psrlw $2, %xmm0
13599 ; SSE3-NEXT: pand %xmm1, %xmm0
13600 ; SSE3-NEXT: paddb %xmm2, %xmm0
13601 ; SSE3-NEXT: movdqa %xmm0, %xmm1
13602 ; SSE3-NEXT: psrlw $4, %xmm1
13603 ; SSE3-NEXT: paddb %xmm0, %xmm1
13604 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13605 ; SSE3-NEXT: pxor %xmm0, %xmm0
13606 ; SSE3-NEXT: movdqa %xmm1, %xmm2
13607 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
13608 ; SSE3-NEXT: psadbw %xmm0, %xmm2
13609 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
13610 ; SSE3-NEXT: psadbw %xmm0, %xmm1
13611 ; SSE3-NEXT: packuswb %xmm2, %xmm1
13612 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [23,23,23,23]
13613 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
13616 ; SSSE3-LABEL: ult_23_v4i32:
13618 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13619 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
13620 ; SSSE3-NEXT: pand %xmm1, %xmm2
13621 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13622 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
13623 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
13624 ; SSSE3-NEXT: psrlw $4, %xmm0
13625 ; SSSE3-NEXT: pand %xmm1, %xmm0
13626 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
13627 ; SSSE3-NEXT: paddb %xmm4, %xmm3
13628 ; SSSE3-NEXT: pxor %xmm0, %xmm0
13629 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
13630 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
13631 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
13632 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
13633 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
13634 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
13635 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [23,23,23,23]
13636 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
13639 ; SSE41-LABEL: ult_23_v4i32:
13641 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13642 ; SSE41-NEXT: movdqa %xmm0, %xmm2
13643 ; SSE41-NEXT: pand %xmm1, %xmm2
13644 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13645 ; SSE41-NEXT: movdqa %xmm3, %xmm4
13646 ; SSE41-NEXT: pshufb %xmm2, %xmm4
13647 ; SSE41-NEXT: psrlw $4, %xmm0
13648 ; SSE41-NEXT: pand %xmm1, %xmm0
13649 ; SSE41-NEXT: pshufb %xmm0, %xmm3
13650 ; SSE41-NEXT: paddb %xmm4, %xmm3
13651 ; SSE41-NEXT: pxor %xmm0, %xmm0
13652 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
13653 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
13654 ; SSE41-NEXT: psadbw %xmm0, %xmm3
13655 ; SSE41-NEXT: psadbw %xmm0, %xmm1
13656 ; SSE41-NEXT: packuswb %xmm3, %xmm1
13657 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [23,23,23,23]
13658 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
13661 ; AVX1-LABEL: ult_23_v4i32:
13663 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13664 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
13665 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13666 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
13667 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
13668 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
13669 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
13670 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
13671 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
13672 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13673 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13674 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13675 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13676 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13677 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [23,23,23,23]
13678 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
13681 ; AVX2-LABEL: ult_23_v4i32:
13683 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13684 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
13685 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13686 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
13687 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
13688 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
13689 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
13690 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
13691 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
13692 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13693 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13694 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13695 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13696 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13697 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23]
13698 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
13701 ; AVX512VPOPCNTDQ-LABEL: ult_23_v4i32:
13702 ; AVX512VPOPCNTDQ: # %bb.0:
13703 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13704 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
13705 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23]
13706 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
13707 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
13708 ; AVX512VPOPCNTDQ-NEXT: retq
13710 ; AVX512VPOPCNTDQVL-LABEL: ult_23_v4i32:
13711 ; AVX512VPOPCNTDQVL: # %bb.0:
13712 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
13713 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23]
13714 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
13715 ; AVX512VPOPCNTDQVL-NEXT: retq
13717 ; BITALG_NOVLX-LABEL: ult_23_v4i32:
13718 ; BITALG_NOVLX: # %bb.0:
13719 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13720 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
13721 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
13722 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13723 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13724 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13725 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13726 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13727 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23]
13728 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
13729 ; BITALG_NOVLX-NEXT: vzeroupper
13730 ; BITALG_NOVLX-NEXT: retq
13732 ; BITALG-LABEL: ult_23_v4i32:
13734 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
13735 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
13736 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13737 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13738 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13739 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13740 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13741 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23]
13742 ; BITALG-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
13743 ; BITALG-NEXT: retq
13744 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
13745 %3 = icmp ult <4 x i32> %2, <i32 23, i32 23, i32 23, i32 23>
13746 %4 = sext <4 x i1> %3 to <4 x i32>
13750 define <4 x i32> @ugt_23_v4i32(<4 x i32> %0) {
13751 ; SSE2-LABEL: ugt_23_v4i32:
13753 ; SSE2-NEXT: movdqa %xmm0, %xmm1
13754 ; SSE2-NEXT: psrlw $1, %xmm1
13755 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13756 ; SSE2-NEXT: psubb %xmm1, %xmm0
13757 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13758 ; SSE2-NEXT: movdqa %xmm0, %xmm2
13759 ; SSE2-NEXT: pand %xmm1, %xmm2
13760 ; SSE2-NEXT: psrlw $2, %xmm0
13761 ; SSE2-NEXT: pand %xmm1, %xmm0
13762 ; SSE2-NEXT: paddb %xmm2, %xmm0
13763 ; SSE2-NEXT: movdqa %xmm0, %xmm1
13764 ; SSE2-NEXT: psrlw $4, %xmm1
13765 ; SSE2-NEXT: paddb %xmm1, %xmm0
13766 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
13767 ; SSE2-NEXT: pxor %xmm1, %xmm1
13768 ; SSE2-NEXT: movdqa %xmm0, %xmm2
13769 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
13770 ; SSE2-NEXT: psadbw %xmm1, %xmm2
13771 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
13772 ; SSE2-NEXT: psadbw %xmm1, %xmm0
13773 ; SSE2-NEXT: packuswb %xmm2, %xmm0
13774 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
13777 ; SSE3-LABEL: ugt_23_v4i32:
13779 ; SSE3-NEXT: movdqa %xmm0, %xmm1
13780 ; SSE3-NEXT: psrlw $1, %xmm1
13781 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13782 ; SSE3-NEXT: psubb %xmm1, %xmm0
13783 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13784 ; SSE3-NEXT: movdqa %xmm0, %xmm2
13785 ; SSE3-NEXT: pand %xmm1, %xmm2
13786 ; SSE3-NEXT: psrlw $2, %xmm0
13787 ; SSE3-NEXT: pand %xmm1, %xmm0
13788 ; SSE3-NEXT: paddb %xmm2, %xmm0
13789 ; SSE3-NEXT: movdqa %xmm0, %xmm1
13790 ; SSE3-NEXT: psrlw $4, %xmm1
13791 ; SSE3-NEXT: paddb %xmm1, %xmm0
13792 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
13793 ; SSE3-NEXT: pxor %xmm1, %xmm1
13794 ; SSE3-NEXT: movdqa %xmm0, %xmm2
13795 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
13796 ; SSE3-NEXT: psadbw %xmm1, %xmm2
13797 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
13798 ; SSE3-NEXT: psadbw %xmm1, %xmm0
13799 ; SSE3-NEXT: packuswb %xmm2, %xmm0
13800 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
13803 ; SSSE3-LABEL: ugt_23_v4i32:
13805 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13806 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
13807 ; SSSE3-NEXT: pand %xmm2, %xmm3
13808 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13809 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
13810 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
13811 ; SSSE3-NEXT: psrlw $4, %xmm0
13812 ; SSSE3-NEXT: pand %xmm2, %xmm0
13813 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
13814 ; SSSE3-NEXT: paddb %xmm4, %xmm1
13815 ; SSSE3-NEXT: pxor %xmm0, %xmm0
13816 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
13817 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
13818 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
13819 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
13820 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
13821 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
13822 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13823 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
13826 ; SSE41-LABEL: ugt_23_v4i32:
13828 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13829 ; SSE41-NEXT: movdqa %xmm0, %xmm2
13830 ; SSE41-NEXT: pand %xmm1, %xmm2
13831 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13832 ; SSE41-NEXT: movdqa %xmm3, %xmm4
13833 ; SSE41-NEXT: pshufb %xmm2, %xmm4
13834 ; SSE41-NEXT: psrlw $4, %xmm0
13835 ; SSE41-NEXT: pand %xmm1, %xmm0
13836 ; SSE41-NEXT: pshufb %xmm0, %xmm3
13837 ; SSE41-NEXT: paddb %xmm4, %xmm3
13838 ; SSE41-NEXT: pxor %xmm1, %xmm1
13839 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
13840 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
13841 ; SSE41-NEXT: psadbw %xmm1, %xmm3
13842 ; SSE41-NEXT: psadbw %xmm1, %xmm0
13843 ; SSE41-NEXT: packuswb %xmm3, %xmm0
13844 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
13847 ; AVX1-LABEL: ugt_23_v4i32:
13849 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13850 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
13851 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13852 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
13853 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
13854 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
13855 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
13856 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
13857 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
13858 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13859 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13860 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13861 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13862 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13863 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
13866 ; AVX2-LABEL: ugt_23_v4i32:
13868 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13869 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
13870 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13871 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
13872 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
13873 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
13874 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
13875 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
13876 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
13877 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13878 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13879 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13880 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13881 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13882 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23]
13883 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
13886 ; AVX512VPOPCNTDQ-LABEL: ugt_23_v4i32:
13887 ; AVX512VPOPCNTDQ: # %bb.0:
13888 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13889 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
13890 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23]
13891 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
13892 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
13893 ; AVX512VPOPCNTDQ-NEXT: retq
13895 ; AVX512VPOPCNTDQVL-LABEL: ugt_23_v4i32:
13896 ; AVX512VPOPCNTDQVL: # %bb.0:
13897 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
13898 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23]
13899 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
13900 ; AVX512VPOPCNTDQVL-NEXT: retq
13902 ; BITALG_NOVLX-LABEL: ugt_23_v4i32:
13903 ; BITALG_NOVLX: # %bb.0:
13904 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
13905 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
13906 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
13907 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13908 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13909 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13910 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13911 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13912 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23]
13913 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
13914 ; BITALG_NOVLX-NEXT: vzeroupper
13915 ; BITALG_NOVLX-NEXT: retq
13917 ; BITALG-LABEL: ugt_23_v4i32:
13919 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
13920 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
13921 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13922 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
13923 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
13924 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
13925 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
13926 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [23,23,23,23]
13927 ; BITALG-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
13928 ; BITALG-NEXT: retq
13929 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
13930 %3 = icmp ugt <4 x i32> %2, <i32 23, i32 23, i32 23, i32 23>
13931 %4 = sext <4 x i1> %3 to <4 x i32>
13935 define <4 x i32> @ult_24_v4i32(<4 x i32> %0) {
13936 ; SSE2-LABEL: ult_24_v4i32:
13938 ; SSE2-NEXT: movdqa %xmm0, %xmm1
13939 ; SSE2-NEXT: psrlw $1, %xmm1
13940 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13941 ; SSE2-NEXT: psubb %xmm1, %xmm0
13942 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13943 ; SSE2-NEXT: movdqa %xmm0, %xmm2
13944 ; SSE2-NEXT: pand %xmm1, %xmm2
13945 ; SSE2-NEXT: psrlw $2, %xmm0
13946 ; SSE2-NEXT: pand %xmm1, %xmm0
13947 ; SSE2-NEXT: paddb %xmm2, %xmm0
13948 ; SSE2-NEXT: movdqa %xmm0, %xmm1
13949 ; SSE2-NEXT: psrlw $4, %xmm1
13950 ; SSE2-NEXT: paddb %xmm0, %xmm1
13951 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13952 ; SSE2-NEXT: pxor %xmm0, %xmm0
13953 ; SSE2-NEXT: movdqa %xmm1, %xmm2
13954 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
13955 ; SSE2-NEXT: psadbw %xmm0, %xmm2
13956 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
13957 ; SSE2-NEXT: psadbw %xmm0, %xmm1
13958 ; SSE2-NEXT: packuswb %xmm2, %xmm1
13959 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [24,24,24,24]
13960 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
13963 ; SSE3-LABEL: ult_24_v4i32:
13965 ; SSE3-NEXT: movdqa %xmm0, %xmm1
13966 ; SSE3-NEXT: psrlw $1, %xmm1
13967 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13968 ; SSE3-NEXT: psubb %xmm1, %xmm0
13969 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
13970 ; SSE3-NEXT: movdqa %xmm0, %xmm2
13971 ; SSE3-NEXT: pand %xmm1, %xmm2
13972 ; SSE3-NEXT: psrlw $2, %xmm0
13973 ; SSE3-NEXT: pand %xmm1, %xmm0
13974 ; SSE3-NEXT: paddb %xmm2, %xmm0
13975 ; SSE3-NEXT: movdqa %xmm0, %xmm1
13976 ; SSE3-NEXT: psrlw $4, %xmm1
13977 ; SSE3-NEXT: paddb %xmm0, %xmm1
13978 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
13979 ; SSE3-NEXT: pxor %xmm0, %xmm0
13980 ; SSE3-NEXT: movdqa %xmm1, %xmm2
13981 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
13982 ; SSE3-NEXT: psadbw %xmm0, %xmm2
13983 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
13984 ; SSE3-NEXT: psadbw %xmm0, %xmm1
13985 ; SSE3-NEXT: packuswb %xmm2, %xmm1
13986 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [24,24,24,24]
13987 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
13990 ; SSSE3-LABEL: ult_24_v4i32:
13992 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
13993 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
13994 ; SSSE3-NEXT: pand %xmm1, %xmm2
13995 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
13996 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
13997 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
13998 ; SSSE3-NEXT: psrlw $4, %xmm0
13999 ; SSSE3-NEXT: pand %xmm1, %xmm0
14000 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
14001 ; SSSE3-NEXT: paddb %xmm4, %xmm3
14002 ; SSSE3-NEXT: pxor %xmm0, %xmm0
14003 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
14004 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
14005 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
14006 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
14007 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
14008 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
14009 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [24,24,24,24]
14010 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
14013 ; SSE41-LABEL: ult_24_v4i32:
14015 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14016 ; SSE41-NEXT: movdqa %xmm0, %xmm2
14017 ; SSE41-NEXT: pand %xmm1, %xmm2
14018 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14019 ; SSE41-NEXT: movdqa %xmm3, %xmm4
14020 ; SSE41-NEXT: pshufb %xmm2, %xmm4
14021 ; SSE41-NEXT: psrlw $4, %xmm0
14022 ; SSE41-NEXT: pand %xmm1, %xmm0
14023 ; SSE41-NEXT: pshufb %xmm0, %xmm3
14024 ; SSE41-NEXT: paddb %xmm4, %xmm3
14025 ; SSE41-NEXT: pxor %xmm0, %xmm0
14026 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
14027 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
14028 ; SSE41-NEXT: psadbw %xmm0, %xmm3
14029 ; SSE41-NEXT: psadbw %xmm0, %xmm1
14030 ; SSE41-NEXT: packuswb %xmm3, %xmm1
14031 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [24,24,24,24]
14032 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
14035 ; AVX1-LABEL: ult_24_v4i32:
14037 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14038 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
14039 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14040 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
14041 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
14042 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
14043 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
14044 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
14045 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
14046 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14047 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14048 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14049 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14050 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14051 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [24,24,24,24]
14052 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
14055 ; AVX2-LABEL: ult_24_v4i32:
14057 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14058 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
14059 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14060 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
14061 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
14062 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
14063 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
14064 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
14065 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
14066 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14067 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14068 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14069 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14070 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14071 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24]
14072 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
14075 ; AVX512VPOPCNTDQ-LABEL: ult_24_v4i32:
14076 ; AVX512VPOPCNTDQ: # %bb.0:
14077 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
14078 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
14079 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24]
14080 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
14081 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
14082 ; AVX512VPOPCNTDQ-NEXT: retq
14084 ; AVX512VPOPCNTDQVL-LABEL: ult_24_v4i32:
14085 ; AVX512VPOPCNTDQVL: # %bb.0:
14086 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
14087 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24]
14088 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
14089 ; AVX512VPOPCNTDQVL-NEXT: retq
14091 ; BITALG_NOVLX-LABEL: ult_24_v4i32:
14092 ; BITALG_NOVLX: # %bb.0:
14093 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
14094 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
14095 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
14096 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14097 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14098 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14099 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14100 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14101 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24]
14102 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
14103 ; BITALG_NOVLX-NEXT: vzeroupper
14104 ; BITALG_NOVLX-NEXT: retq
14106 ; BITALG-LABEL: ult_24_v4i32:
14108 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
14109 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
14110 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14111 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14112 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14113 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14114 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14115 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24]
14116 ; BITALG-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
14117 ; BITALG-NEXT: retq
14118 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
14119 %3 = icmp ult <4 x i32> %2, <i32 24, i32 24, i32 24, i32 24>
14120 %4 = sext <4 x i1> %3 to <4 x i32>
14124 define <4 x i32> @ugt_24_v4i32(<4 x i32> %0) {
14125 ; SSE2-LABEL: ugt_24_v4i32:
14127 ; SSE2-NEXT: movdqa %xmm0, %xmm1
14128 ; SSE2-NEXT: psrlw $1, %xmm1
14129 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14130 ; SSE2-NEXT: psubb %xmm1, %xmm0
14131 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
14132 ; SSE2-NEXT: movdqa %xmm0, %xmm2
14133 ; SSE2-NEXT: pand %xmm1, %xmm2
14134 ; SSE2-NEXT: psrlw $2, %xmm0
14135 ; SSE2-NEXT: pand %xmm1, %xmm0
14136 ; SSE2-NEXT: paddb %xmm2, %xmm0
14137 ; SSE2-NEXT: movdqa %xmm0, %xmm1
14138 ; SSE2-NEXT: psrlw $4, %xmm1
14139 ; SSE2-NEXT: paddb %xmm1, %xmm0
14140 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
14141 ; SSE2-NEXT: pxor %xmm1, %xmm1
14142 ; SSE2-NEXT: movdqa %xmm0, %xmm2
14143 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
14144 ; SSE2-NEXT: psadbw %xmm1, %xmm2
14145 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
14146 ; SSE2-NEXT: psadbw %xmm1, %xmm0
14147 ; SSE2-NEXT: packuswb %xmm2, %xmm0
14148 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
14151 ; SSE3-LABEL: ugt_24_v4i32:
14153 ; SSE3-NEXT: movdqa %xmm0, %xmm1
14154 ; SSE3-NEXT: psrlw $1, %xmm1
14155 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14156 ; SSE3-NEXT: psubb %xmm1, %xmm0
14157 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
14158 ; SSE3-NEXT: movdqa %xmm0, %xmm2
14159 ; SSE3-NEXT: pand %xmm1, %xmm2
14160 ; SSE3-NEXT: psrlw $2, %xmm0
14161 ; SSE3-NEXT: pand %xmm1, %xmm0
14162 ; SSE3-NEXT: paddb %xmm2, %xmm0
14163 ; SSE3-NEXT: movdqa %xmm0, %xmm1
14164 ; SSE3-NEXT: psrlw $4, %xmm1
14165 ; SSE3-NEXT: paddb %xmm1, %xmm0
14166 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
14167 ; SSE3-NEXT: pxor %xmm1, %xmm1
14168 ; SSE3-NEXT: movdqa %xmm0, %xmm2
14169 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
14170 ; SSE3-NEXT: psadbw %xmm1, %xmm2
14171 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
14172 ; SSE3-NEXT: psadbw %xmm1, %xmm0
14173 ; SSE3-NEXT: packuswb %xmm2, %xmm0
14174 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
14177 ; SSSE3-LABEL: ugt_24_v4i32:
14179 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14180 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
14181 ; SSSE3-NEXT: pand %xmm2, %xmm3
14182 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14183 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
14184 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
14185 ; SSSE3-NEXT: psrlw $4, %xmm0
14186 ; SSSE3-NEXT: pand %xmm2, %xmm0
14187 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
14188 ; SSSE3-NEXT: paddb %xmm4, %xmm1
14189 ; SSSE3-NEXT: pxor %xmm0, %xmm0
14190 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
14191 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
14192 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
14193 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
14194 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
14195 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
14196 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14197 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
14200 ; SSE41-LABEL: ugt_24_v4i32:
14202 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14203 ; SSE41-NEXT: movdqa %xmm0, %xmm2
14204 ; SSE41-NEXT: pand %xmm1, %xmm2
14205 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14206 ; SSE41-NEXT: movdqa %xmm3, %xmm4
14207 ; SSE41-NEXT: pshufb %xmm2, %xmm4
14208 ; SSE41-NEXT: psrlw $4, %xmm0
14209 ; SSE41-NEXT: pand %xmm1, %xmm0
14210 ; SSE41-NEXT: pshufb %xmm0, %xmm3
14211 ; SSE41-NEXT: paddb %xmm4, %xmm3
14212 ; SSE41-NEXT: pxor %xmm1, %xmm1
14213 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
14214 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
14215 ; SSE41-NEXT: psadbw %xmm1, %xmm3
14216 ; SSE41-NEXT: psadbw %xmm1, %xmm0
14217 ; SSE41-NEXT: packuswb %xmm3, %xmm0
14218 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
14221 ; AVX1-LABEL: ugt_24_v4i32:
14223 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14224 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
14225 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14226 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
14227 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
14228 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
14229 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
14230 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
14231 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
14232 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14233 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14234 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14235 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14236 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14237 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
14240 ; AVX2-LABEL: ugt_24_v4i32:
14242 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14243 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
14244 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14245 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
14246 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
14247 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
14248 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
14249 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
14250 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
14251 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14252 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14253 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14254 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14255 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14256 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24]
14257 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
14260 ; AVX512VPOPCNTDQ-LABEL: ugt_24_v4i32:
14261 ; AVX512VPOPCNTDQ: # %bb.0:
14262 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
14263 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
14264 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24]
14265 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
14266 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
14267 ; AVX512VPOPCNTDQ-NEXT: retq
14269 ; AVX512VPOPCNTDQVL-LABEL: ugt_24_v4i32:
14270 ; AVX512VPOPCNTDQVL: # %bb.0:
14271 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
14272 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24]
14273 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
14274 ; AVX512VPOPCNTDQVL-NEXT: retq
14276 ; BITALG_NOVLX-LABEL: ugt_24_v4i32:
14277 ; BITALG_NOVLX: # %bb.0:
14278 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
14279 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
14280 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
14281 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14282 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14283 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14284 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14285 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14286 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24]
14287 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
14288 ; BITALG_NOVLX-NEXT: vzeroupper
14289 ; BITALG_NOVLX-NEXT: retq
14291 ; BITALG-LABEL: ugt_24_v4i32:
14293 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
14294 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
14295 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14296 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14297 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14298 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14299 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14300 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [24,24,24,24]
14301 ; BITALG-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
14302 ; BITALG-NEXT: retq
14303 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
14304 %3 = icmp ugt <4 x i32> %2, <i32 24, i32 24, i32 24, i32 24>
14305 %4 = sext <4 x i1> %3 to <4 x i32>
14309 define <4 x i32> @ult_25_v4i32(<4 x i32> %0) {
14310 ; SSE2-LABEL: ult_25_v4i32:
14312 ; SSE2-NEXT: movdqa %xmm0, %xmm1
14313 ; SSE2-NEXT: psrlw $1, %xmm1
14314 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14315 ; SSE2-NEXT: psubb %xmm1, %xmm0
14316 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
14317 ; SSE2-NEXT: movdqa %xmm0, %xmm2
14318 ; SSE2-NEXT: pand %xmm1, %xmm2
14319 ; SSE2-NEXT: psrlw $2, %xmm0
14320 ; SSE2-NEXT: pand %xmm1, %xmm0
14321 ; SSE2-NEXT: paddb %xmm2, %xmm0
14322 ; SSE2-NEXT: movdqa %xmm0, %xmm1
14323 ; SSE2-NEXT: psrlw $4, %xmm1
14324 ; SSE2-NEXT: paddb %xmm0, %xmm1
14325 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14326 ; SSE2-NEXT: pxor %xmm0, %xmm0
14327 ; SSE2-NEXT: movdqa %xmm1, %xmm2
14328 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
14329 ; SSE2-NEXT: psadbw %xmm0, %xmm2
14330 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
14331 ; SSE2-NEXT: psadbw %xmm0, %xmm1
14332 ; SSE2-NEXT: packuswb %xmm2, %xmm1
14333 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [25,25,25,25]
14334 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
14337 ; SSE3-LABEL: ult_25_v4i32:
14339 ; SSE3-NEXT: movdqa %xmm0, %xmm1
14340 ; SSE3-NEXT: psrlw $1, %xmm1
14341 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14342 ; SSE3-NEXT: psubb %xmm1, %xmm0
14343 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
14344 ; SSE3-NEXT: movdqa %xmm0, %xmm2
14345 ; SSE3-NEXT: pand %xmm1, %xmm2
14346 ; SSE3-NEXT: psrlw $2, %xmm0
14347 ; SSE3-NEXT: pand %xmm1, %xmm0
14348 ; SSE3-NEXT: paddb %xmm2, %xmm0
14349 ; SSE3-NEXT: movdqa %xmm0, %xmm1
14350 ; SSE3-NEXT: psrlw $4, %xmm1
14351 ; SSE3-NEXT: paddb %xmm0, %xmm1
14352 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14353 ; SSE3-NEXT: pxor %xmm0, %xmm0
14354 ; SSE3-NEXT: movdqa %xmm1, %xmm2
14355 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
14356 ; SSE3-NEXT: psadbw %xmm0, %xmm2
14357 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
14358 ; SSE3-NEXT: psadbw %xmm0, %xmm1
14359 ; SSE3-NEXT: packuswb %xmm2, %xmm1
14360 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [25,25,25,25]
14361 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
14364 ; SSSE3-LABEL: ult_25_v4i32:
14366 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14367 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
14368 ; SSSE3-NEXT: pand %xmm1, %xmm2
14369 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14370 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
14371 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
14372 ; SSSE3-NEXT: psrlw $4, %xmm0
14373 ; SSSE3-NEXT: pand %xmm1, %xmm0
14374 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
14375 ; SSSE3-NEXT: paddb %xmm4, %xmm3
14376 ; SSSE3-NEXT: pxor %xmm0, %xmm0
14377 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
14378 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
14379 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
14380 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
14381 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
14382 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
14383 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [25,25,25,25]
14384 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
14387 ; SSE41-LABEL: ult_25_v4i32:
14389 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14390 ; SSE41-NEXT: movdqa %xmm0, %xmm2
14391 ; SSE41-NEXT: pand %xmm1, %xmm2
14392 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14393 ; SSE41-NEXT: movdqa %xmm3, %xmm4
14394 ; SSE41-NEXT: pshufb %xmm2, %xmm4
14395 ; SSE41-NEXT: psrlw $4, %xmm0
14396 ; SSE41-NEXT: pand %xmm1, %xmm0
14397 ; SSE41-NEXT: pshufb %xmm0, %xmm3
14398 ; SSE41-NEXT: paddb %xmm4, %xmm3
14399 ; SSE41-NEXT: pxor %xmm0, %xmm0
14400 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
14401 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
14402 ; SSE41-NEXT: psadbw %xmm0, %xmm3
14403 ; SSE41-NEXT: psadbw %xmm0, %xmm1
14404 ; SSE41-NEXT: packuswb %xmm3, %xmm1
14405 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [25,25,25,25]
14406 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
14409 ; AVX1-LABEL: ult_25_v4i32:
14411 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14412 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
14413 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14414 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
14415 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
14416 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
14417 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
14418 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
14419 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
14420 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14421 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14422 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14423 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14424 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14425 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [25,25,25,25]
14426 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
14429 ; AVX2-LABEL: ult_25_v4i32:
14431 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14432 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
14433 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14434 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
14435 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
14436 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
14437 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
14438 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
14439 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
14440 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14441 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14442 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14443 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14444 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14445 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25]
14446 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
14449 ; AVX512VPOPCNTDQ-LABEL: ult_25_v4i32:
14450 ; AVX512VPOPCNTDQ: # %bb.0:
14451 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
14452 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
14453 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25]
14454 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
14455 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
14456 ; AVX512VPOPCNTDQ-NEXT: retq
14458 ; AVX512VPOPCNTDQVL-LABEL: ult_25_v4i32:
14459 ; AVX512VPOPCNTDQVL: # %bb.0:
14460 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
14461 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25]
14462 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
14463 ; AVX512VPOPCNTDQVL-NEXT: retq
14465 ; BITALG_NOVLX-LABEL: ult_25_v4i32:
14466 ; BITALG_NOVLX: # %bb.0:
14467 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
14468 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
14469 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
14470 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14471 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14472 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14473 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14474 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14475 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25]
14476 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
14477 ; BITALG_NOVLX-NEXT: vzeroupper
14478 ; BITALG_NOVLX-NEXT: retq
14480 ; BITALG-LABEL: ult_25_v4i32:
14482 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
14483 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
14484 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14485 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14486 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14487 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14488 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14489 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25]
14490 ; BITALG-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
14491 ; BITALG-NEXT: retq
14492 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
14493 %3 = icmp ult <4 x i32> %2, <i32 25, i32 25, i32 25, i32 25>
14494 %4 = sext <4 x i1> %3 to <4 x i32>
14498 define <4 x i32> @ugt_25_v4i32(<4 x i32> %0) {
14499 ; SSE2-LABEL: ugt_25_v4i32:
14501 ; SSE2-NEXT: movdqa %xmm0, %xmm1
14502 ; SSE2-NEXT: psrlw $1, %xmm1
14503 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14504 ; SSE2-NEXT: psubb %xmm1, %xmm0
14505 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
14506 ; SSE2-NEXT: movdqa %xmm0, %xmm2
14507 ; SSE2-NEXT: pand %xmm1, %xmm2
14508 ; SSE2-NEXT: psrlw $2, %xmm0
14509 ; SSE2-NEXT: pand %xmm1, %xmm0
14510 ; SSE2-NEXT: paddb %xmm2, %xmm0
14511 ; SSE2-NEXT: movdqa %xmm0, %xmm1
14512 ; SSE2-NEXT: psrlw $4, %xmm1
14513 ; SSE2-NEXT: paddb %xmm1, %xmm0
14514 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
14515 ; SSE2-NEXT: pxor %xmm1, %xmm1
14516 ; SSE2-NEXT: movdqa %xmm0, %xmm2
14517 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
14518 ; SSE2-NEXT: psadbw %xmm1, %xmm2
14519 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
14520 ; SSE2-NEXT: psadbw %xmm1, %xmm0
14521 ; SSE2-NEXT: packuswb %xmm2, %xmm0
14522 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
14525 ; SSE3-LABEL: ugt_25_v4i32:
14527 ; SSE3-NEXT: movdqa %xmm0, %xmm1
14528 ; SSE3-NEXT: psrlw $1, %xmm1
14529 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14530 ; SSE3-NEXT: psubb %xmm1, %xmm0
14531 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
14532 ; SSE3-NEXT: movdqa %xmm0, %xmm2
14533 ; SSE3-NEXT: pand %xmm1, %xmm2
14534 ; SSE3-NEXT: psrlw $2, %xmm0
14535 ; SSE3-NEXT: pand %xmm1, %xmm0
14536 ; SSE3-NEXT: paddb %xmm2, %xmm0
14537 ; SSE3-NEXT: movdqa %xmm0, %xmm1
14538 ; SSE3-NEXT: psrlw $4, %xmm1
14539 ; SSE3-NEXT: paddb %xmm1, %xmm0
14540 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
14541 ; SSE3-NEXT: pxor %xmm1, %xmm1
14542 ; SSE3-NEXT: movdqa %xmm0, %xmm2
14543 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
14544 ; SSE3-NEXT: psadbw %xmm1, %xmm2
14545 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
14546 ; SSE3-NEXT: psadbw %xmm1, %xmm0
14547 ; SSE3-NEXT: packuswb %xmm2, %xmm0
14548 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
14551 ; SSSE3-LABEL: ugt_25_v4i32:
14553 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14554 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
14555 ; SSSE3-NEXT: pand %xmm2, %xmm3
14556 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14557 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
14558 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
14559 ; SSSE3-NEXT: psrlw $4, %xmm0
14560 ; SSSE3-NEXT: pand %xmm2, %xmm0
14561 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
14562 ; SSSE3-NEXT: paddb %xmm4, %xmm1
14563 ; SSSE3-NEXT: pxor %xmm0, %xmm0
14564 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
14565 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
14566 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
14567 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
14568 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
14569 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
14570 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14571 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
14574 ; SSE41-LABEL: ugt_25_v4i32:
14576 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14577 ; SSE41-NEXT: movdqa %xmm0, %xmm2
14578 ; SSE41-NEXT: pand %xmm1, %xmm2
14579 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14580 ; SSE41-NEXT: movdqa %xmm3, %xmm4
14581 ; SSE41-NEXT: pshufb %xmm2, %xmm4
14582 ; SSE41-NEXT: psrlw $4, %xmm0
14583 ; SSE41-NEXT: pand %xmm1, %xmm0
14584 ; SSE41-NEXT: pshufb %xmm0, %xmm3
14585 ; SSE41-NEXT: paddb %xmm4, %xmm3
14586 ; SSE41-NEXT: pxor %xmm1, %xmm1
14587 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
14588 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
14589 ; SSE41-NEXT: psadbw %xmm1, %xmm3
14590 ; SSE41-NEXT: psadbw %xmm1, %xmm0
14591 ; SSE41-NEXT: packuswb %xmm3, %xmm0
14592 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
14595 ; AVX1-LABEL: ugt_25_v4i32:
14597 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14598 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
14599 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14600 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
14601 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
14602 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
14603 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
14604 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
14605 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
14606 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14607 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14608 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14609 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14610 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14611 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
14614 ; AVX2-LABEL: ugt_25_v4i32:
14616 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14617 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
14618 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14619 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
14620 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
14621 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
14622 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
14623 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
14624 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
14625 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14626 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14627 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14628 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14629 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14630 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25]
14631 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
14634 ; AVX512VPOPCNTDQ-LABEL: ugt_25_v4i32:
14635 ; AVX512VPOPCNTDQ: # %bb.0:
14636 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
14637 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
14638 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25]
14639 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
14640 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
14641 ; AVX512VPOPCNTDQ-NEXT: retq
14643 ; AVX512VPOPCNTDQVL-LABEL: ugt_25_v4i32:
14644 ; AVX512VPOPCNTDQVL: # %bb.0:
14645 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
14646 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25]
14647 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
14648 ; AVX512VPOPCNTDQVL-NEXT: retq
14650 ; BITALG_NOVLX-LABEL: ugt_25_v4i32:
14651 ; BITALG_NOVLX: # %bb.0:
14652 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
14653 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
14654 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
14655 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14656 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14657 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14658 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14659 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14660 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25]
14661 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
14662 ; BITALG_NOVLX-NEXT: vzeroupper
14663 ; BITALG_NOVLX-NEXT: retq
14665 ; BITALG-LABEL: ugt_25_v4i32:
14667 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
14668 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
14669 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14670 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14671 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14672 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14673 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14674 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [25,25,25,25]
14675 ; BITALG-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
14676 ; BITALG-NEXT: retq
14677 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
14678 %3 = icmp ugt <4 x i32> %2, <i32 25, i32 25, i32 25, i32 25>
14679 %4 = sext <4 x i1> %3 to <4 x i32>
14683 define <4 x i32> @ult_26_v4i32(<4 x i32> %0) {
14684 ; SSE2-LABEL: ult_26_v4i32:
14686 ; SSE2-NEXT: movdqa %xmm0, %xmm1
14687 ; SSE2-NEXT: psrlw $1, %xmm1
14688 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14689 ; SSE2-NEXT: psubb %xmm1, %xmm0
14690 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
14691 ; SSE2-NEXT: movdqa %xmm0, %xmm2
14692 ; SSE2-NEXT: pand %xmm1, %xmm2
14693 ; SSE2-NEXT: psrlw $2, %xmm0
14694 ; SSE2-NEXT: pand %xmm1, %xmm0
14695 ; SSE2-NEXT: paddb %xmm2, %xmm0
14696 ; SSE2-NEXT: movdqa %xmm0, %xmm1
14697 ; SSE2-NEXT: psrlw $4, %xmm1
14698 ; SSE2-NEXT: paddb %xmm0, %xmm1
14699 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14700 ; SSE2-NEXT: pxor %xmm0, %xmm0
14701 ; SSE2-NEXT: movdqa %xmm1, %xmm2
14702 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
14703 ; SSE2-NEXT: psadbw %xmm0, %xmm2
14704 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
14705 ; SSE2-NEXT: psadbw %xmm0, %xmm1
14706 ; SSE2-NEXT: packuswb %xmm2, %xmm1
14707 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [26,26,26,26]
14708 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
14711 ; SSE3-LABEL: ult_26_v4i32:
14713 ; SSE3-NEXT: movdqa %xmm0, %xmm1
14714 ; SSE3-NEXT: psrlw $1, %xmm1
14715 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14716 ; SSE3-NEXT: psubb %xmm1, %xmm0
14717 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
14718 ; SSE3-NEXT: movdqa %xmm0, %xmm2
14719 ; SSE3-NEXT: pand %xmm1, %xmm2
14720 ; SSE3-NEXT: psrlw $2, %xmm0
14721 ; SSE3-NEXT: pand %xmm1, %xmm0
14722 ; SSE3-NEXT: paddb %xmm2, %xmm0
14723 ; SSE3-NEXT: movdqa %xmm0, %xmm1
14724 ; SSE3-NEXT: psrlw $4, %xmm1
14725 ; SSE3-NEXT: paddb %xmm0, %xmm1
14726 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14727 ; SSE3-NEXT: pxor %xmm0, %xmm0
14728 ; SSE3-NEXT: movdqa %xmm1, %xmm2
14729 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
14730 ; SSE3-NEXT: psadbw %xmm0, %xmm2
14731 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
14732 ; SSE3-NEXT: psadbw %xmm0, %xmm1
14733 ; SSE3-NEXT: packuswb %xmm2, %xmm1
14734 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [26,26,26,26]
14735 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
14738 ; SSSE3-LABEL: ult_26_v4i32:
14740 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14741 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
14742 ; SSSE3-NEXT: pand %xmm1, %xmm2
14743 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14744 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
14745 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
14746 ; SSSE3-NEXT: psrlw $4, %xmm0
14747 ; SSSE3-NEXT: pand %xmm1, %xmm0
14748 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
14749 ; SSSE3-NEXT: paddb %xmm4, %xmm3
14750 ; SSSE3-NEXT: pxor %xmm0, %xmm0
14751 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
14752 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
14753 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
14754 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
14755 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
14756 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
14757 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [26,26,26,26]
14758 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
14761 ; SSE41-LABEL: ult_26_v4i32:
14763 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14764 ; SSE41-NEXT: movdqa %xmm0, %xmm2
14765 ; SSE41-NEXT: pand %xmm1, %xmm2
14766 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14767 ; SSE41-NEXT: movdqa %xmm3, %xmm4
14768 ; SSE41-NEXT: pshufb %xmm2, %xmm4
14769 ; SSE41-NEXT: psrlw $4, %xmm0
14770 ; SSE41-NEXT: pand %xmm1, %xmm0
14771 ; SSE41-NEXT: pshufb %xmm0, %xmm3
14772 ; SSE41-NEXT: paddb %xmm4, %xmm3
14773 ; SSE41-NEXT: pxor %xmm0, %xmm0
14774 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
14775 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
14776 ; SSE41-NEXT: psadbw %xmm0, %xmm3
14777 ; SSE41-NEXT: psadbw %xmm0, %xmm1
14778 ; SSE41-NEXT: packuswb %xmm3, %xmm1
14779 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [26,26,26,26]
14780 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
14783 ; AVX1-LABEL: ult_26_v4i32:
14785 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14786 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
14787 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14788 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
14789 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
14790 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
14791 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
14792 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
14793 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
14794 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14795 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14796 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14797 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14798 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14799 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [26,26,26,26]
14800 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
14803 ; AVX2-LABEL: ult_26_v4i32:
14805 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14806 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
14807 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14808 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
14809 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
14810 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
14811 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
14812 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
14813 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
14814 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14815 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14816 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14817 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14818 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14819 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26]
14820 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
14823 ; AVX512VPOPCNTDQ-LABEL: ult_26_v4i32:
14824 ; AVX512VPOPCNTDQ: # %bb.0:
14825 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
14826 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
14827 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26]
14828 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
14829 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
14830 ; AVX512VPOPCNTDQ-NEXT: retq
14832 ; AVX512VPOPCNTDQVL-LABEL: ult_26_v4i32:
14833 ; AVX512VPOPCNTDQVL: # %bb.0:
14834 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
14835 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26]
14836 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
14837 ; AVX512VPOPCNTDQVL-NEXT: retq
14839 ; BITALG_NOVLX-LABEL: ult_26_v4i32:
14840 ; BITALG_NOVLX: # %bb.0:
14841 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
14842 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
14843 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
14844 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14845 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14846 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14847 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14848 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14849 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26]
14850 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
14851 ; BITALG_NOVLX-NEXT: vzeroupper
14852 ; BITALG_NOVLX-NEXT: retq
14854 ; BITALG-LABEL: ult_26_v4i32:
14856 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
14857 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
14858 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14859 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14860 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14861 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14862 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14863 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26]
14864 ; BITALG-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
14865 ; BITALG-NEXT: retq
14866 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
14867 %3 = icmp ult <4 x i32> %2, <i32 26, i32 26, i32 26, i32 26>
14868 %4 = sext <4 x i1> %3 to <4 x i32>
14872 define <4 x i32> @ugt_26_v4i32(<4 x i32> %0) {
14873 ; SSE2-LABEL: ugt_26_v4i32:
14875 ; SSE2-NEXT: movdqa %xmm0, %xmm1
14876 ; SSE2-NEXT: psrlw $1, %xmm1
14877 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14878 ; SSE2-NEXT: psubb %xmm1, %xmm0
14879 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
14880 ; SSE2-NEXT: movdqa %xmm0, %xmm2
14881 ; SSE2-NEXT: pand %xmm1, %xmm2
14882 ; SSE2-NEXT: psrlw $2, %xmm0
14883 ; SSE2-NEXT: pand %xmm1, %xmm0
14884 ; SSE2-NEXT: paddb %xmm2, %xmm0
14885 ; SSE2-NEXT: movdqa %xmm0, %xmm1
14886 ; SSE2-NEXT: psrlw $4, %xmm1
14887 ; SSE2-NEXT: paddb %xmm1, %xmm0
14888 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
14889 ; SSE2-NEXT: pxor %xmm1, %xmm1
14890 ; SSE2-NEXT: movdqa %xmm0, %xmm2
14891 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
14892 ; SSE2-NEXT: psadbw %xmm1, %xmm2
14893 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
14894 ; SSE2-NEXT: psadbw %xmm1, %xmm0
14895 ; SSE2-NEXT: packuswb %xmm2, %xmm0
14896 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
14899 ; SSE3-LABEL: ugt_26_v4i32:
14901 ; SSE3-NEXT: movdqa %xmm0, %xmm1
14902 ; SSE3-NEXT: psrlw $1, %xmm1
14903 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14904 ; SSE3-NEXT: psubb %xmm1, %xmm0
14905 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
14906 ; SSE3-NEXT: movdqa %xmm0, %xmm2
14907 ; SSE3-NEXT: pand %xmm1, %xmm2
14908 ; SSE3-NEXT: psrlw $2, %xmm0
14909 ; SSE3-NEXT: pand %xmm1, %xmm0
14910 ; SSE3-NEXT: paddb %xmm2, %xmm0
14911 ; SSE3-NEXT: movdqa %xmm0, %xmm1
14912 ; SSE3-NEXT: psrlw $4, %xmm1
14913 ; SSE3-NEXT: paddb %xmm1, %xmm0
14914 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
14915 ; SSE3-NEXT: pxor %xmm1, %xmm1
14916 ; SSE3-NEXT: movdqa %xmm0, %xmm2
14917 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
14918 ; SSE3-NEXT: psadbw %xmm1, %xmm2
14919 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
14920 ; SSE3-NEXT: psadbw %xmm1, %xmm0
14921 ; SSE3-NEXT: packuswb %xmm2, %xmm0
14922 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
14925 ; SSSE3-LABEL: ugt_26_v4i32:
14927 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14928 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
14929 ; SSSE3-NEXT: pand %xmm2, %xmm3
14930 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14931 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
14932 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
14933 ; SSSE3-NEXT: psrlw $4, %xmm0
14934 ; SSSE3-NEXT: pand %xmm2, %xmm0
14935 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
14936 ; SSSE3-NEXT: paddb %xmm4, %xmm1
14937 ; SSSE3-NEXT: pxor %xmm0, %xmm0
14938 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
14939 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
14940 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
14941 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
14942 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
14943 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
14944 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
14945 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
14948 ; SSE41-LABEL: ugt_26_v4i32:
14950 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14951 ; SSE41-NEXT: movdqa %xmm0, %xmm2
14952 ; SSE41-NEXT: pand %xmm1, %xmm2
14953 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14954 ; SSE41-NEXT: movdqa %xmm3, %xmm4
14955 ; SSE41-NEXT: pshufb %xmm2, %xmm4
14956 ; SSE41-NEXT: psrlw $4, %xmm0
14957 ; SSE41-NEXT: pand %xmm1, %xmm0
14958 ; SSE41-NEXT: pshufb %xmm0, %xmm3
14959 ; SSE41-NEXT: paddb %xmm4, %xmm3
14960 ; SSE41-NEXT: pxor %xmm1, %xmm1
14961 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
14962 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
14963 ; SSE41-NEXT: psadbw %xmm1, %xmm3
14964 ; SSE41-NEXT: psadbw %xmm1, %xmm0
14965 ; SSE41-NEXT: packuswb %xmm3, %xmm0
14966 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
14969 ; AVX1-LABEL: ugt_26_v4i32:
14971 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14972 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
14973 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14974 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
14975 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
14976 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
14977 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
14978 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
14979 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
14980 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
14981 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
14982 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
14983 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
14984 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
14985 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
14988 ; AVX2-LABEL: ugt_26_v4i32:
14990 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
14991 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
14992 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
14993 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
14994 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
14995 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
14996 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
14997 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
14998 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
14999 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15000 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15001 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15002 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15003 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15004 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26]
15005 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
15008 ; AVX512VPOPCNTDQ-LABEL: ugt_26_v4i32:
15009 ; AVX512VPOPCNTDQ: # %bb.0:
15010 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15011 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
15012 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26]
15013 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
15014 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
15015 ; AVX512VPOPCNTDQ-NEXT: retq
15017 ; AVX512VPOPCNTDQVL-LABEL: ugt_26_v4i32:
15018 ; AVX512VPOPCNTDQVL: # %bb.0:
15019 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
15020 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26]
15021 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
15022 ; AVX512VPOPCNTDQVL-NEXT: retq
15024 ; BITALG_NOVLX-LABEL: ugt_26_v4i32:
15025 ; BITALG_NOVLX: # %bb.0:
15026 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15027 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
15028 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
15029 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15030 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15031 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15032 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15033 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15034 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26]
15035 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
15036 ; BITALG_NOVLX-NEXT: vzeroupper
15037 ; BITALG_NOVLX-NEXT: retq
15039 ; BITALG-LABEL: ugt_26_v4i32:
15041 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
15042 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
15043 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15044 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15045 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15046 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15047 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15048 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [26,26,26,26]
15049 ; BITALG-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
15050 ; BITALG-NEXT: retq
15051 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
15052 %3 = icmp ugt <4 x i32> %2, <i32 26, i32 26, i32 26, i32 26>
15053 %4 = sext <4 x i1> %3 to <4 x i32>
15057 define <4 x i32> @ult_27_v4i32(<4 x i32> %0) {
15058 ; SSE2-LABEL: ult_27_v4i32:
15060 ; SSE2-NEXT: movdqa %xmm0, %xmm1
15061 ; SSE2-NEXT: psrlw $1, %xmm1
15062 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15063 ; SSE2-NEXT: psubb %xmm1, %xmm0
15064 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
15065 ; SSE2-NEXT: movdqa %xmm0, %xmm2
15066 ; SSE2-NEXT: pand %xmm1, %xmm2
15067 ; SSE2-NEXT: psrlw $2, %xmm0
15068 ; SSE2-NEXT: pand %xmm1, %xmm0
15069 ; SSE2-NEXT: paddb %xmm2, %xmm0
15070 ; SSE2-NEXT: movdqa %xmm0, %xmm1
15071 ; SSE2-NEXT: psrlw $4, %xmm1
15072 ; SSE2-NEXT: paddb %xmm0, %xmm1
15073 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15074 ; SSE2-NEXT: pxor %xmm0, %xmm0
15075 ; SSE2-NEXT: movdqa %xmm1, %xmm2
15076 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
15077 ; SSE2-NEXT: psadbw %xmm0, %xmm2
15078 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
15079 ; SSE2-NEXT: psadbw %xmm0, %xmm1
15080 ; SSE2-NEXT: packuswb %xmm2, %xmm1
15081 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [27,27,27,27]
15082 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
15085 ; SSE3-LABEL: ult_27_v4i32:
15087 ; SSE3-NEXT: movdqa %xmm0, %xmm1
15088 ; SSE3-NEXT: psrlw $1, %xmm1
15089 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15090 ; SSE3-NEXT: psubb %xmm1, %xmm0
15091 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
15092 ; SSE3-NEXT: movdqa %xmm0, %xmm2
15093 ; SSE3-NEXT: pand %xmm1, %xmm2
15094 ; SSE3-NEXT: psrlw $2, %xmm0
15095 ; SSE3-NEXT: pand %xmm1, %xmm0
15096 ; SSE3-NEXT: paddb %xmm2, %xmm0
15097 ; SSE3-NEXT: movdqa %xmm0, %xmm1
15098 ; SSE3-NEXT: psrlw $4, %xmm1
15099 ; SSE3-NEXT: paddb %xmm0, %xmm1
15100 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15101 ; SSE3-NEXT: pxor %xmm0, %xmm0
15102 ; SSE3-NEXT: movdqa %xmm1, %xmm2
15103 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
15104 ; SSE3-NEXT: psadbw %xmm0, %xmm2
15105 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
15106 ; SSE3-NEXT: psadbw %xmm0, %xmm1
15107 ; SSE3-NEXT: packuswb %xmm2, %xmm1
15108 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [27,27,27,27]
15109 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
15112 ; SSSE3-LABEL: ult_27_v4i32:
15114 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15115 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
15116 ; SSSE3-NEXT: pand %xmm1, %xmm2
15117 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15118 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
15119 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
15120 ; SSSE3-NEXT: psrlw $4, %xmm0
15121 ; SSSE3-NEXT: pand %xmm1, %xmm0
15122 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
15123 ; SSSE3-NEXT: paddb %xmm4, %xmm3
15124 ; SSSE3-NEXT: pxor %xmm0, %xmm0
15125 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
15126 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
15127 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
15128 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
15129 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
15130 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
15131 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [27,27,27,27]
15132 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
15135 ; SSE41-LABEL: ult_27_v4i32:
15137 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15138 ; SSE41-NEXT: movdqa %xmm0, %xmm2
15139 ; SSE41-NEXT: pand %xmm1, %xmm2
15140 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15141 ; SSE41-NEXT: movdqa %xmm3, %xmm4
15142 ; SSE41-NEXT: pshufb %xmm2, %xmm4
15143 ; SSE41-NEXT: psrlw $4, %xmm0
15144 ; SSE41-NEXT: pand %xmm1, %xmm0
15145 ; SSE41-NEXT: pshufb %xmm0, %xmm3
15146 ; SSE41-NEXT: paddb %xmm4, %xmm3
15147 ; SSE41-NEXT: pxor %xmm0, %xmm0
15148 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
15149 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
15150 ; SSE41-NEXT: psadbw %xmm0, %xmm3
15151 ; SSE41-NEXT: psadbw %xmm0, %xmm1
15152 ; SSE41-NEXT: packuswb %xmm3, %xmm1
15153 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [27,27,27,27]
15154 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
15157 ; AVX1-LABEL: ult_27_v4i32:
15159 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15160 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
15161 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15162 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
15163 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
15164 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
15165 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
15166 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
15167 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
15168 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15169 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15170 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15171 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15172 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15173 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [27,27,27,27]
15174 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
15177 ; AVX2-LABEL: ult_27_v4i32:
15179 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15180 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
15181 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15182 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
15183 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
15184 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
15185 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
15186 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
15187 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
15188 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15189 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15190 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15191 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15192 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15193 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27]
15194 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
15197 ; AVX512VPOPCNTDQ-LABEL: ult_27_v4i32:
15198 ; AVX512VPOPCNTDQ: # %bb.0:
15199 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15200 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
15201 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27]
15202 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
15203 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
15204 ; AVX512VPOPCNTDQ-NEXT: retq
15206 ; AVX512VPOPCNTDQVL-LABEL: ult_27_v4i32:
15207 ; AVX512VPOPCNTDQVL: # %bb.0:
15208 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
15209 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27]
15210 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
15211 ; AVX512VPOPCNTDQVL-NEXT: retq
15213 ; BITALG_NOVLX-LABEL: ult_27_v4i32:
15214 ; BITALG_NOVLX: # %bb.0:
15215 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15216 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
15217 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
15218 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15219 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15220 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15221 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15222 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15223 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27]
15224 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
15225 ; BITALG_NOVLX-NEXT: vzeroupper
15226 ; BITALG_NOVLX-NEXT: retq
15228 ; BITALG-LABEL: ult_27_v4i32:
15230 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
15231 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
15232 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15233 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15234 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15235 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15236 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15237 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27]
15238 ; BITALG-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
15239 ; BITALG-NEXT: retq
15240 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
15241 %3 = icmp ult <4 x i32> %2, <i32 27, i32 27, i32 27, i32 27>
15242 %4 = sext <4 x i1> %3 to <4 x i32>
15246 define <4 x i32> @ugt_27_v4i32(<4 x i32> %0) {
15247 ; SSE2-LABEL: ugt_27_v4i32:
15249 ; SSE2-NEXT: movdqa %xmm0, %xmm1
15250 ; SSE2-NEXT: psrlw $1, %xmm1
15251 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15252 ; SSE2-NEXT: psubb %xmm1, %xmm0
15253 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
15254 ; SSE2-NEXT: movdqa %xmm0, %xmm2
15255 ; SSE2-NEXT: pand %xmm1, %xmm2
15256 ; SSE2-NEXT: psrlw $2, %xmm0
15257 ; SSE2-NEXT: pand %xmm1, %xmm0
15258 ; SSE2-NEXT: paddb %xmm2, %xmm0
15259 ; SSE2-NEXT: movdqa %xmm0, %xmm1
15260 ; SSE2-NEXT: psrlw $4, %xmm1
15261 ; SSE2-NEXT: paddb %xmm1, %xmm0
15262 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
15263 ; SSE2-NEXT: pxor %xmm1, %xmm1
15264 ; SSE2-NEXT: movdqa %xmm0, %xmm2
15265 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
15266 ; SSE2-NEXT: psadbw %xmm1, %xmm2
15267 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
15268 ; SSE2-NEXT: psadbw %xmm1, %xmm0
15269 ; SSE2-NEXT: packuswb %xmm2, %xmm0
15270 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
15273 ; SSE3-LABEL: ugt_27_v4i32:
15275 ; SSE3-NEXT: movdqa %xmm0, %xmm1
15276 ; SSE3-NEXT: psrlw $1, %xmm1
15277 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15278 ; SSE3-NEXT: psubb %xmm1, %xmm0
15279 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
15280 ; SSE3-NEXT: movdqa %xmm0, %xmm2
15281 ; SSE3-NEXT: pand %xmm1, %xmm2
15282 ; SSE3-NEXT: psrlw $2, %xmm0
15283 ; SSE3-NEXT: pand %xmm1, %xmm0
15284 ; SSE3-NEXT: paddb %xmm2, %xmm0
15285 ; SSE3-NEXT: movdqa %xmm0, %xmm1
15286 ; SSE3-NEXT: psrlw $4, %xmm1
15287 ; SSE3-NEXT: paddb %xmm1, %xmm0
15288 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
15289 ; SSE3-NEXT: pxor %xmm1, %xmm1
15290 ; SSE3-NEXT: movdqa %xmm0, %xmm2
15291 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
15292 ; SSE3-NEXT: psadbw %xmm1, %xmm2
15293 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
15294 ; SSE3-NEXT: psadbw %xmm1, %xmm0
15295 ; SSE3-NEXT: packuswb %xmm2, %xmm0
15296 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
15299 ; SSSE3-LABEL: ugt_27_v4i32:
15301 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15302 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
15303 ; SSSE3-NEXT: pand %xmm2, %xmm3
15304 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15305 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
15306 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
15307 ; SSSE3-NEXT: psrlw $4, %xmm0
15308 ; SSSE3-NEXT: pand %xmm2, %xmm0
15309 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
15310 ; SSSE3-NEXT: paddb %xmm4, %xmm1
15311 ; SSSE3-NEXT: pxor %xmm0, %xmm0
15312 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
15313 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
15314 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
15315 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
15316 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
15317 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
15318 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15319 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
15322 ; SSE41-LABEL: ugt_27_v4i32:
15324 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15325 ; SSE41-NEXT: movdqa %xmm0, %xmm2
15326 ; SSE41-NEXT: pand %xmm1, %xmm2
15327 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15328 ; SSE41-NEXT: movdqa %xmm3, %xmm4
15329 ; SSE41-NEXT: pshufb %xmm2, %xmm4
15330 ; SSE41-NEXT: psrlw $4, %xmm0
15331 ; SSE41-NEXT: pand %xmm1, %xmm0
15332 ; SSE41-NEXT: pshufb %xmm0, %xmm3
15333 ; SSE41-NEXT: paddb %xmm4, %xmm3
15334 ; SSE41-NEXT: pxor %xmm1, %xmm1
15335 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
15336 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
15337 ; SSE41-NEXT: psadbw %xmm1, %xmm3
15338 ; SSE41-NEXT: psadbw %xmm1, %xmm0
15339 ; SSE41-NEXT: packuswb %xmm3, %xmm0
15340 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
15343 ; AVX1-LABEL: ugt_27_v4i32:
15345 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15346 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
15347 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15348 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
15349 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
15350 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
15351 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
15352 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
15353 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
15354 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15355 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15356 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15357 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15358 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15359 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
15362 ; AVX2-LABEL: ugt_27_v4i32:
15364 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15365 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
15366 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15367 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
15368 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
15369 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
15370 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
15371 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
15372 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
15373 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15374 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15375 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15376 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15377 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15378 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27]
15379 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
15382 ; AVX512VPOPCNTDQ-LABEL: ugt_27_v4i32:
15383 ; AVX512VPOPCNTDQ: # %bb.0:
15384 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15385 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
15386 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27]
15387 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
15388 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
15389 ; AVX512VPOPCNTDQ-NEXT: retq
15391 ; AVX512VPOPCNTDQVL-LABEL: ugt_27_v4i32:
15392 ; AVX512VPOPCNTDQVL: # %bb.0:
15393 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
15394 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27]
15395 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
15396 ; AVX512VPOPCNTDQVL-NEXT: retq
15398 ; BITALG_NOVLX-LABEL: ugt_27_v4i32:
15399 ; BITALG_NOVLX: # %bb.0:
15400 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15401 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
15402 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
15403 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15404 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15405 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15406 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15407 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15408 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27]
15409 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
15410 ; BITALG_NOVLX-NEXT: vzeroupper
15411 ; BITALG_NOVLX-NEXT: retq
15413 ; BITALG-LABEL: ugt_27_v4i32:
15415 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
15416 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
15417 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15418 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15419 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15420 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15421 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15422 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [27,27,27,27]
15423 ; BITALG-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
15424 ; BITALG-NEXT: retq
15425 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
15426 %3 = icmp ugt <4 x i32> %2, <i32 27, i32 27, i32 27, i32 27>
15427 %4 = sext <4 x i1> %3 to <4 x i32>
15431 define <4 x i32> @ult_28_v4i32(<4 x i32> %0) {
15432 ; SSE2-LABEL: ult_28_v4i32:
15434 ; SSE2-NEXT: movdqa %xmm0, %xmm1
15435 ; SSE2-NEXT: psrlw $1, %xmm1
15436 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15437 ; SSE2-NEXT: psubb %xmm1, %xmm0
15438 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
15439 ; SSE2-NEXT: movdqa %xmm0, %xmm2
15440 ; SSE2-NEXT: pand %xmm1, %xmm2
15441 ; SSE2-NEXT: psrlw $2, %xmm0
15442 ; SSE2-NEXT: pand %xmm1, %xmm0
15443 ; SSE2-NEXT: paddb %xmm2, %xmm0
15444 ; SSE2-NEXT: movdqa %xmm0, %xmm1
15445 ; SSE2-NEXT: psrlw $4, %xmm1
15446 ; SSE2-NEXT: paddb %xmm0, %xmm1
15447 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15448 ; SSE2-NEXT: pxor %xmm0, %xmm0
15449 ; SSE2-NEXT: movdqa %xmm1, %xmm2
15450 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
15451 ; SSE2-NEXT: psadbw %xmm0, %xmm2
15452 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
15453 ; SSE2-NEXT: psadbw %xmm0, %xmm1
15454 ; SSE2-NEXT: packuswb %xmm2, %xmm1
15455 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [28,28,28,28]
15456 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
15459 ; SSE3-LABEL: ult_28_v4i32:
15461 ; SSE3-NEXT: movdqa %xmm0, %xmm1
15462 ; SSE3-NEXT: psrlw $1, %xmm1
15463 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15464 ; SSE3-NEXT: psubb %xmm1, %xmm0
15465 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
15466 ; SSE3-NEXT: movdqa %xmm0, %xmm2
15467 ; SSE3-NEXT: pand %xmm1, %xmm2
15468 ; SSE3-NEXT: psrlw $2, %xmm0
15469 ; SSE3-NEXT: pand %xmm1, %xmm0
15470 ; SSE3-NEXT: paddb %xmm2, %xmm0
15471 ; SSE3-NEXT: movdqa %xmm0, %xmm1
15472 ; SSE3-NEXT: psrlw $4, %xmm1
15473 ; SSE3-NEXT: paddb %xmm0, %xmm1
15474 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15475 ; SSE3-NEXT: pxor %xmm0, %xmm0
15476 ; SSE3-NEXT: movdqa %xmm1, %xmm2
15477 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
15478 ; SSE3-NEXT: psadbw %xmm0, %xmm2
15479 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
15480 ; SSE3-NEXT: psadbw %xmm0, %xmm1
15481 ; SSE3-NEXT: packuswb %xmm2, %xmm1
15482 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [28,28,28,28]
15483 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
15486 ; SSSE3-LABEL: ult_28_v4i32:
15488 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15489 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
15490 ; SSSE3-NEXT: pand %xmm1, %xmm2
15491 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15492 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
15493 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
15494 ; SSSE3-NEXT: psrlw $4, %xmm0
15495 ; SSSE3-NEXT: pand %xmm1, %xmm0
15496 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
15497 ; SSSE3-NEXT: paddb %xmm4, %xmm3
15498 ; SSSE3-NEXT: pxor %xmm0, %xmm0
15499 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
15500 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
15501 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
15502 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
15503 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
15504 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
15505 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [28,28,28,28]
15506 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
15509 ; SSE41-LABEL: ult_28_v4i32:
15511 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15512 ; SSE41-NEXT: movdqa %xmm0, %xmm2
15513 ; SSE41-NEXT: pand %xmm1, %xmm2
15514 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15515 ; SSE41-NEXT: movdqa %xmm3, %xmm4
15516 ; SSE41-NEXT: pshufb %xmm2, %xmm4
15517 ; SSE41-NEXT: psrlw $4, %xmm0
15518 ; SSE41-NEXT: pand %xmm1, %xmm0
15519 ; SSE41-NEXT: pshufb %xmm0, %xmm3
15520 ; SSE41-NEXT: paddb %xmm4, %xmm3
15521 ; SSE41-NEXT: pxor %xmm0, %xmm0
15522 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
15523 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
15524 ; SSE41-NEXT: psadbw %xmm0, %xmm3
15525 ; SSE41-NEXT: psadbw %xmm0, %xmm1
15526 ; SSE41-NEXT: packuswb %xmm3, %xmm1
15527 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [28,28,28,28]
15528 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
15531 ; AVX1-LABEL: ult_28_v4i32:
15533 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15534 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
15535 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15536 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
15537 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
15538 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
15539 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
15540 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
15541 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
15542 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15543 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15544 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15545 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15546 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15547 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [28,28,28,28]
15548 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
15551 ; AVX2-LABEL: ult_28_v4i32:
15553 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15554 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
15555 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15556 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
15557 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
15558 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
15559 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
15560 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
15561 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
15562 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15563 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15564 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15565 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15566 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15567 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28]
15568 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
15571 ; AVX512VPOPCNTDQ-LABEL: ult_28_v4i32:
15572 ; AVX512VPOPCNTDQ: # %bb.0:
15573 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15574 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
15575 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28]
15576 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
15577 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
15578 ; AVX512VPOPCNTDQ-NEXT: retq
15580 ; AVX512VPOPCNTDQVL-LABEL: ult_28_v4i32:
15581 ; AVX512VPOPCNTDQVL: # %bb.0:
15582 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
15583 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28]
15584 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
15585 ; AVX512VPOPCNTDQVL-NEXT: retq
15587 ; BITALG_NOVLX-LABEL: ult_28_v4i32:
15588 ; BITALG_NOVLX: # %bb.0:
15589 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15590 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
15591 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
15592 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15593 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15594 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15595 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15596 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15597 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28]
15598 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
15599 ; BITALG_NOVLX-NEXT: vzeroupper
15600 ; BITALG_NOVLX-NEXT: retq
15602 ; BITALG-LABEL: ult_28_v4i32:
15604 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
15605 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
15606 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15607 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15608 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15609 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15610 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15611 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28]
15612 ; BITALG-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
15613 ; BITALG-NEXT: retq
15614 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
15615 %3 = icmp ult <4 x i32> %2, <i32 28, i32 28, i32 28, i32 28>
15616 %4 = sext <4 x i1> %3 to <4 x i32>
15620 define <4 x i32> @ugt_28_v4i32(<4 x i32> %0) {
15621 ; SSE2-LABEL: ugt_28_v4i32:
15623 ; SSE2-NEXT: movdqa %xmm0, %xmm1
15624 ; SSE2-NEXT: psrlw $1, %xmm1
15625 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15626 ; SSE2-NEXT: psubb %xmm1, %xmm0
15627 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
15628 ; SSE2-NEXT: movdqa %xmm0, %xmm2
15629 ; SSE2-NEXT: pand %xmm1, %xmm2
15630 ; SSE2-NEXT: psrlw $2, %xmm0
15631 ; SSE2-NEXT: pand %xmm1, %xmm0
15632 ; SSE2-NEXT: paddb %xmm2, %xmm0
15633 ; SSE2-NEXT: movdqa %xmm0, %xmm1
15634 ; SSE2-NEXT: psrlw $4, %xmm1
15635 ; SSE2-NEXT: paddb %xmm1, %xmm0
15636 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
15637 ; SSE2-NEXT: pxor %xmm1, %xmm1
15638 ; SSE2-NEXT: movdqa %xmm0, %xmm2
15639 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
15640 ; SSE2-NEXT: psadbw %xmm1, %xmm2
15641 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
15642 ; SSE2-NEXT: psadbw %xmm1, %xmm0
15643 ; SSE2-NEXT: packuswb %xmm2, %xmm0
15644 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
15647 ; SSE3-LABEL: ugt_28_v4i32:
15649 ; SSE3-NEXT: movdqa %xmm0, %xmm1
15650 ; SSE3-NEXT: psrlw $1, %xmm1
15651 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15652 ; SSE3-NEXT: psubb %xmm1, %xmm0
15653 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
15654 ; SSE3-NEXT: movdqa %xmm0, %xmm2
15655 ; SSE3-NEXT: pand %xmm1, %xmm2
15656 ; SSE3-NEXT: psrlw $2, %xmm0
15657 ; SSE3-NEXT: pand %xmm1, %xmm0
15658 ; SSE3-NEXT: paddb %xmm2, %xmm0
15659 ; SSE3-NEXT: movdqa %xmm0, %xmm1
15660 ; SSE3-NEXT: psrlw $4, %xmm1
15661 ; SSE3-NEXT: paddb %xmm1, %xmm0
15662 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
15663 ; SSE3-NEXT: pxor %xmm1, %xmm1
15664 ; SSE3-NEXT: movdqa %xmm0, %xmm2
15665 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
15666 ; SSE3-NEXT: psadbw %xmm1, %xmm2
15667 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
15668 ; SSE3-NEXT: psadbw %xmm1, %xmm0
15669 ; SSE3-NEXT: packuswb %xmm2, %xmm0
15670 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
15673 ; SSSE3-LABEL: ugt_28_v4i32:
15675 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15676 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
15677 ; SSSE3-NEXT: pand %xmm2, %xmm3
15678 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15679 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
15680 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
15681 ; SSSE3-NEXT: psrlw $4, %xmm0
15682 ; SSSE3-NEXT: pand %xmm2, %xmm0
15683 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
15684 ; SSSE3-NEXT: paddb %xmm4, %xmm1
15685 ; SSSE3-NEXT: pxor %xmm0, %xmm0
15686 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
15687 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
15688 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
15689 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
15690 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
15691 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
15692 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15693 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
15696 ; SSE41-LABEL: ugt_28_v4i32:
15698 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15699 ; SSE41-NEXT: movdqa %xmm0, %xmm2
15700 ; SSE41-NEXT: pand %xmm1, %xmm2
15701 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15702 ; SSE41-NEXT: movdqa %xmm3, %xmm4
15703 ; SSE41-NEXT: pshufb %xmm2, %xmm4
15704 ; SSE41-NEXT: psrlw $4, %xmm0
15705 ; SSE41-NEXT: pand %xmm1, %xmm0
15706 ; SSE41-NEXT: pshufb %xmm0, %xmm3
15707 ; SSE41-NEXT: paddb %xmm4, %xmm3
15708 ; SSE41-NEXT: pxor %xmm1, %xmm1
15709 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
15710 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
15711 ; SSE41-NEXT: psadbw %xmm1, %xmm3
15712 ; SSE41-NEXT: psadbw %xmm1, %xmm0
15713 ; SSE41-NEXT: packuswb %xmm3, %xmm0
15714 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
15717 ; AVX1-LABEL: ugt_28_v4i32:
15719 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15720 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
15721 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15722 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
15723 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
15724 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
15725 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
15726 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
15727 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
15728 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15729 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15730 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15731 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15732 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15733 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
15736 ; AVX2-LABEL: ugt_28_v4i32:
15738 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15739 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
15740 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15741 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
15742 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
15743 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
15744 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
15745 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
15746 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
15747 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15748 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15749 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15750 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15751 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15752 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28]
15753 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
15756 ; AVX512VPOPCNTDQ-LABEL: ugt_28_v4i32:
15757 ; AVX512VPOPCNTDQ: # %bb.0:
15758 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15759 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
15760 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28]
15761 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
15762 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
15763 ; AVX512VPOPCNTDQ-NEXT: retq
15765 ; AVX512VPOPCNTDQVL-LABEL: ugt_28_v4i32:
15766 ; AVX512VPOPCNTDQVL: # %bb.0:
15767 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
15768 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28]
15769 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
15770 ; AVX512VPOPCNTDQVL-NEXT: retq
15772 ; BITALG_NOVLX-LABEL: ugt_28_v4i32:
15773 ; BITALG_NOVLX: # %bb.0:
15774 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15775 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
15776 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
15777 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15778 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15779 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15780 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15781 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15782 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28]
15783 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
15784 ; BITALG_NOVLX-NEXT: vzeroupper
15785 ; BITALG_NOVLX-NEXT: retq
15787 ; BITALG-LABEL: ugt_28_v4i32:
15789 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
15790 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
15791 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15792 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15793 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15794 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15795 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15796 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [28,28,28,28]
15797 ; BITALG-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
15798 ; BITALG-NEXT: retq
15799 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
15800 %3 = icmp ugt <4 x i32> %2, <i32 28, i32 28, i32 28, i32 28>
15801 %4 = sext <4 x i1> %3 to <4 x i32>
15805 define <4 x i32> @ult_29_v4i32(<4 x i32> %0) {
15806 ; SSE2-LABEL: ult_29_v4i32:
15808 ; SSE2-NEXT: movdqa %xmm0, %xmm1
15809 ; SSE2-NEXT: psrlw $1, %xmm1
15810 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15811 ; SSE2-NEXT: psubb %xmm1, %xmm0
15812 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
15813 ; SSE2-NEXT: movdqa %xmm0, %xmm2
15814 ; SSE2-NEXT: pand %xmm1, %xmm2
15815 ; SSE2-NEXT: psrlw $2, %xmm0
15816 ; SSE2-NEXT: pand %xmm1, %xmm0
15817 ; SSE2-NEXT: paddb %xmm2, %xmm0
15818 ; SSE2-NEXT: movdqa %xmm0, %xmm1
15819 ; SSE2-NEXT: psrlw $4, %xmm1
15820 ; SSE2-NEXT: paddb %xmm0, %xmm1
15821 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15822 ; SSE2-NEXT: pxor %xmm0, %xmm0
15823 ; SSE2-NEXT: movdqa %xmm1, %xmm2
15824 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
15825 ; SSE2-NEXT: psadbw %xmm0, %xmm2
15826 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
15827 ; SSE2-NEXT: psadbw %xmm0, %xmm1
15828 ; SSE2-NEXT: packuswb %xmm2, %xmm1
15829 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [29,29,29,29]
15830 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
15833 ; SSE3-LABEL: ult_29_v4i32:
15835 ; SSE3-NEXT: movdqa %xmm0, %xmm1
15836 ; SSE3-NEXT: psrlw $1, %xmm1
15837 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15838 ; SSE3-NEXT: psubb %xmm1, %xmm0
15839 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
15840 ; SSE3-NEXT: movdqa %xmm0, %xmm2
15841 ; SSE3-NEXT: pand %xmm1, %xmm2
15842 ; SSE3-NEXT: psrlw $2, %xmm0
15843 ; SSE3-NEXT: pand %xmm1, %xmm0
15844 ; SSE3-NEXT: paddb %xmm2, %xmm0
15845 ; SSE3-NEXT: movdqa %xmm0, %xmm1
15846 ; SSE3-NEXT: psrlw $4, %xmm1
15847 ; SSE3-NEXT: paddb %xmm0, %xmm1
15848 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
15849 ; SSE3-NEXT: pxor %xmm0, %xmm0
15850 ; SSE3-NEXT: movdqa %xmm1, %xmm2
15851 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
15852 ; SSE3-NEXT: psadbw %xmm0, %xmm2
15853 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
15854 ; SSE3-NEXT: psadbw %xmm0, %xmm1
15855 ; SSE3-NEXT: packuswb %xmm2, %xmm1
15856 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [29,29,29,29]
15857 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
15860 ; SSSE3-LABEL: ult_29_v4i32:
15862 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15863 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
15864 ; SSSE3-NEXT: pand %xmm1, %xmm2
15865 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15866 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
15867 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
15868 ; SSSE3-NEXT: psrlw $4, %xmm0
15869 ; SSSE3-NEXT: pand %xmm1, %xmm0
15870 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
15871 ; SSSE3-NEXT: paddb %xmm4, %xmm3
15872 ; SSSE3-NEXT: pxor %xmm0, %xmm0
15873 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
15874 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
15875 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
15876 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
15877 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
15878 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
15879 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [29,29,29,29]
15880 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
15883 ; SSE41-LABEL: ult_29_v4i32:
15885 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15886 ; SSE41-NEXT: movdqa %xmm0, %xmm2
15887 ; SSE41-NEXT: pand %xmm1, %xmm2
15888 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15889 ; SSE41-NEXT: movdqa %xmm3, %xmm4
15890 ; SSE41-NEXT: pshufb %xmm2, %xmm4
15891 ; SSE41-NEXT: psrlw $4, %xmm0
15892 ; SSE41-NEXT: pand %xmm1, %xmm0
15893 ; SSE41-NEXT: pshufb %xmm0, %xmm3
15894 ; SSE41-NEXT: paddb %xmm4, %xmm3
15895 ; SSE41-NEXT: pxor %xmm0, %xmm0
15896 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
15897 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
15898 ; SSE41-NEXT: psadbw %xmm0, %xmm3
15899 ; SSE41-NEXT: psadbw %xmm0, %xmm1
15900 ; SSE41-NEXT: packuswb %xmm3, %xmm1
15901 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [29,29,29,29]
15902 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
15905 ; AVX1-LABEL: ult_29_v4i32:
15907 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15908 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
15909 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15910 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
15911 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
15912 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
15913 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
15914 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
15915 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
15916 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15917 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15918 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15919 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15920 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15921 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [29,29,29,29]
15922 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
15925 ; AVX2-LABEL: ult_29_v4i32:
15927 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
15928 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
15929 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
15930 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
15931 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
15932 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
15933 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
15934 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
15935 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
15936 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15937 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15938 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15939 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15940 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15941 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29]
15942 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
15945 ; AVX512VPOPCNTDQ-LABEL: ult_29_v4i32:
15946 ; AVX512VPOPCNTDQ: # %bb.0:
15947 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15948 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
15949 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29]
15950 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
15951 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
15952 ; AVX512VPOPCNTDQ-NEXT: retq
15954 ; AVX512VPOPCNTDQVL-LABEL: ult_29_v4i32:
15955 ; AVX512VPOPCNTDQVL: # %bb.0:
15956 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
15957 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29]
15958 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
15959 ; AVX512VPOPCNTDQVL-NEXT: retq
15961 ; BITALG_NOVLX-LABEL: ult_29_v4i32:
15962 ; BITALG_NOVLX: # %bb.0:
15963 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
15964 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
15965 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
15966 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15967 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15968 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15969 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15970 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15971 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29]
15972 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
15973 ; BITALG_NOVLX-NEXT: vzeroupper
15974 ; BITALG_NOVLX-NEXT: retq
15976 ; BITALG-LABEL: ult_29_v4i32:
15978 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
15979 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
15980 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
15981 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
15982 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
15983 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
15984 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
15985 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29]
15986 ; BITALG-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
15987 ; BITALG-NEXT: retq
15988 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
15989 %3 = icmp ult <4 x i32> %2, <i32 29, i32 29, i32 29, i32 29>
15990 %4 = sext <4 x i1> %3 to <4 x i32>
15994 define <4 x i32> @ugt_29_v4i32(<4 x i32> %0) {
15995 ; SSE2-LABEL: ugt_29_v4i32:
15997 ; SSE2-NEXT: movdqa %xmm0, %xmm1
15998 ; SSE2-NEXT: psrlw $1, %xmm1
15999 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16000 ; SSE2-NEXT: psubb %xmm1, %xmm0
16001 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
16002 ; SSE2-NEXT: movdqa %xmm0, %xmm2
16003 ; SSE2-NEXT: pand %xmm1, %xmm2
16004 ; SSE2-NEXT: psrlw $2, %xmm0
16005 ; SSE2-NEXT: pand %xmm1, %xmm0
16006 ; SSE2-NEXT: paddb %xmm2, %xmm0
16007 ; SSE2-NEXT: movdqa %xmm0, %xmm1
16008 ; SSE2-NEXT: psrlw $4, %xmm1
16009 ; SSE2-NEXT: paddb %xmm1, %xmm0
16010 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
16011 ; SSE2-NEXT: pxor %xmm1, %xmm1
16012 ; SSE2-NEXT: movdqa %xmm0, %xmm2
16013 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
16014 ; SSE2-NEXT: psadbw %xmm1, %xmm2
16015 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
16016 ; SSE2-NEXT: psadbw %xmm1, %xmm0
16017 ; SSE2-NEXT: packuswb %xmm2, %xmm0
16018 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
16021 ; SSE3-LABEL: ugt_29_v4i32:
16023 ; SSE3-NEXT: movdqa %xmm0, %xmm1
16024 ; SSE3-NEXT: psrlw $1, %xmm1
16025 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16026 ; SSE3-NEXT: psubb %xmm1, %xmm0
16027 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
16028 ; SSE3-NEXT: movdqa %xmm0, %xmm2
16029 ; SSE3-NEXT: pand %xmm1, %xmm2
16030 ; SSE3-NEXT: psrlw $2, %xmm0
16031 ; SSE3-NEXT: pand %xmm1, %xmm0
16032 ; SSE3-NEXT: paddb %xmm2, %xmm0
16033 ; SSE3-NEXT: movdqa %xmm0, %xmm1
16034 ; SSE3-NEXT: psrlw $4, %xmm1
16035 ; SSE3-NEXT: paddb %xmm1, %xmm0
16036 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
16037 ; SSE3-NEXT: pxor %xmm1, %xmm1
16038 ; SSE3-NEXT: movdqa %xmm0, %xmm2
16039 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
16040 ; SSE3-NEXT: psadbw %xmm1, %xmm2
16041 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
16042 ; SSE3-NEXT: psadbw %xmm1, %xmm0
16043 ; SSE3-NEXT: packuswb %xmm2, %xmm0
16044 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
16047 ; SSSE3-LABEL: ugt_29_v4i32:
16049 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16050 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
16051 ; SSSE3-NEXT: pand %xmm2, %xmm3
16052 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16053 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
16054 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
16055 ; SSSE3-NEXT: psrlw $4, %xmm0
16056 ; SSSE3-NEXT: pand %xmm2, %xmm0
16057 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
16058 ; SSSE3-NEXT: paddb %xmm4, %xmm1
16059 ; SSSE3-NEXT: pxor %xmm0, %xmm0
16060 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
16061 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
16062 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
16063 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
16064 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
16065 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
16066 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16067 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
16070 ; SSE41-LABEL: ugt_29_v4i32:
16072 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16073 ; SSE41-NEXT: movdqa %xmm0, %xmm2
16074 ; SSE41-NEXT: pand %xmm1, %xmm2
16075 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16076 ; SSE41-NEXT: movdqa %xmm3, %xmm4
16077 ; SSE41-NEXT: pshufb %xmm2, %xmm4
16078 ; SSE41-NEXT: psrlw $4, %xmm0
16079 ; SSE41-NEXT: pand %xmm1, %xmm0
16080 ; SSE41-NEXT: pshufb %xmm0, %xmm3
16081 ; SSE41-NEXT: paddb %xmm4, %xmm3
16082 ; SSE41-NEXT: pxor %xmm1, %xmm1
16083 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
16084 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
16085 ; SSE41-NEXT: psadbw %xmm1, %xmm3
16086 ; SSE41-NEXT: psadbw %xmm1, %xmm0
16087 ; SSE41-NEXT: packuswb %xmm3, %xmm0
16088 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
16091 ; AVX1-LABEL: ugt_29_v4i32:
16093 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16094 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
16095 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16096 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
16097 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
16098 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
16099 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
16100 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
16101 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
16102 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16103 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16104 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16105 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16106 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16107 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
16110 ; AVX2-LABEL: ugt_29_v4i32:
16112 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16113 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
16114 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16115 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
16116 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
16117 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
16118 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
16119 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
16120 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
16121 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16122 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16123 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16124 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16125 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16126 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29]
16127 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
16130 ; AVX512VPOPCNTDQ-LABEL: ugt_29_v4i32:
16131 ; AVX512VPOPCNTDQ: # %bb.0:
16132 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16133 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
16134 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29]
16135 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
16136 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
16137 ; AVX512VPOPCNTDQ-NEXT: retq
16139 ; AVX512VPOPCNTDQVL-LABEL: ugt_29_v4i32:
16140 ; AVX512VPOPCNTDQVL: # %bb.0:
16141 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
16142 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29]
16143 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
16144 ; AVX512VPOPCNTDQVL-NEXT: retq
16146 ; BITALG_NOVLX-LABEL: ugt_29_v4i32:
16147 ; BITALG_NOVLX: # %bb.0:
16148 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16149 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
16150 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
16151 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16152 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16153 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16154 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16155 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16156 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29]
16157 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
16158 ; BITALG_NOVLX-NEXT: vzeroupper
16159 ; BITALG_NOVLX-NEXT: retq
16161 ; BITALG-LABEL: ugt_29_v4i32:
16163 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
16164 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
16165 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16166 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16167 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16168 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16169 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16170 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [29,29,29,29]
16171 ; BITALG-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
16172 ; BITALG-NEXT: retq
16173 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
16174 %3 = icmp ugt <4 x i32> %2, <i32 29, i32 29, i32 29, i32 29>
16175 %4 = sext <4 x i1> %3 to <4 x i32>
16179 define <4 x i32> @ult_30_v4i32(<4 x i32> %0) {
16180 ; SSE2-LABEL: ult_30_v4i32:
16182 ; SSE2-NEXT: movdqa %xmm0, %xmm1
16183 ; SSE2-NEXT: psrlw $1, %xmm1
16184 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16185 ; SSE2-NEXT: psubb %xmm1, %xmm0
16186 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
16187 ; SSE2-NEXT: movdqa %xmm0, %xmm2
16188 ; SSE2-NEXT: pand %xmm1, %xmm2
16189 ; SSE2-NEXT: psrlw $2, %xmm0
16190 ; SSE2-NEXT: pand %xmm1, %xmm0
16191 ; SSE2-NEXT: paddb %xmm2, %xmm0
16192 ; SSE2-NEXT: movdqa %xmm0, %xmm1
16193 ; SSE2-NEXT: psrlw $4, %xmm1
16194 ; SSE2-NEXT: paddb %xmm0, %xmm1
16195 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16196 ; SSE2-NEXT: pxor %xmm0, %xmm0
16197 ; SSE2-NEXT: movdqa %xmm1, %xmm2
16198 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
16199 ; SSE2-NEXT: psadbw %xmm0, %xmm2
16200 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
16201 ; SSE2-NEXT: psadbw %xmm0, %xmm1
16202 ; SSE2-NEXT: packuswb %xmm2, %xmm1
16203 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [30,30,30,30]
16204 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
16207 ; SSE3-LABEL: ult_30_v4i32:
16209 ; SSE3-NEXT: movdqa %xmm0, %xmm1
16210 ; SSE3-NEXT: psrlw $1, %xmm1
16211 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16212 ; SSE3-NEXT: psubb %xmm1, %xmm0
16213 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
16214 ; SSE3-NEXT: movdqa %xmm0, %xmm2
16215 ; SSE3-NEXT: pand %xmm1, %xmm2
16216 ; SSE3-NEXT: psrlw $2, %xmm0
16217 ; SSE3-NEXT: pand %xmm1, %xmm0
16218 ; SSE3-NEXT: paddb %xmm2, %xmm0
16219 ; SSE3-NEXT: movdqa %xmm0, %xmm1
16220 ; SSE3-NEXT: psrlw $4, %xmm1
16221 ; SSE3-NEXT: paddb %xmm0, %xmm1
16222 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16223 ; SSE3-NEXT: pxor %xmm0, %xmm0
16224 ; SSE3-NEXT: movdqa %xmm1, %xmm2
16225 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
16226 ; SSE3-NEXT: psadbw %xmm0, %xmm2
16227 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
16228 ; SSE3-NEXT: psadbw %xmm0, %xmm1
16229 ; SSE3-NEXT: packuswb %xmm2, %xmm1
16230 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [30,30,30,30]
16231 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
16234 ; SSSE3-LABEL: ult_30_v4i32:
16236 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16237 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
16238 ; SSSE3-NEXT: pand %xmm1, %xmm2
16239 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16240 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
16241 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
16242 ; SSSE3-NEXT: psrlw $4, %xmm0
16243 ; SSSE3-NEXT: pand %xmm1, %xmm0
16244 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
16245 ; SSSE3-NEXT: paddb %xmm4, %xmm3
16246 ; SSSE3-NEXT: pxor %xmm0, %xmm0
16247 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
16248 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
16249 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
16250 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
16251 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
16252 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
16253 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [30,30,30,30]
16254 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
16257 ; SSE41-LABEL: ult_30_v4i32:
16259 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16260 ; SSE41-NEXT: movdqa %xmm0, %xmm2
16261 ; SSE41-NEXT: pand %xmm1, %xmm2
16262 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16263 ; SSE41-NEXT: movdqa %xmm3, %xmm4
16264 ; SSE41-NEXT: pshufb %xmm2, %xmm4
16265 ; SSE41-NEXT: psrlw $4, %xmm0
16266 ; SSE41-NEXT: pand %xmm1, %xmm0
16267 ; SSE41-NEXT: pshufb %xmm0, %xmm3
16268 ; SSE41-NEXT: paddb %xmm4, %xmm3
16269 ; SSE41-NEXT: pxor %xmm0, %xmm0
16270 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
16271 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
16272 ; SSE41-NEXT: psadbw %xmm0, %xmm3
16273 ; SSE41-NEXT: psadbw %xmm0, %xmm1
16274 ; SSE41-NEXT: packuswb %xmm3, %xmm1
16275 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [30,30,30,30]
16276 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
16279 ; AVX1-LABEL: ult_30_v4i32:
16281 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16282 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
16283 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16284 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
16285 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
16286 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
16287 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
16288 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
16289 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
16290 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16291 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16292 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16293 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16294 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16295 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [30,30,30,30]
16296 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
16299 ; AVX2-LABEL: ult_30_v4i32:
16301 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16302 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
16303 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16304 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
16305 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
16306 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
16307 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
16308 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
16309 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
16310 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16311 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16312 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16313 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16314 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16315 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30]
16316 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
16319 ; AVX512VPOPCNTDQ-LABEL: ult_30_v4i32:
16320 ; AVX512VPOPCNTDQ: # %bb.0:
16321 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16322 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
16323 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30]
16324 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
16325 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
16326 ; AVX512VPOPCNTDQ-NEXT: retq
16328 ; AVX512VPOPCNTDQVL-LABEL: ult_30_v4i32:
16329 ; AVX512VPOPCNTDQVL: # %bb.0:
16330 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
16331 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30]
16332 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
16333 ; AVX512VPOPCNTDQVL-NEXT: retq
16335 ; BITALG_NOVLX-LABEL: ult_30_v4i32:
16336 ; BITALG_NOVLX: # %bb.0:
16337 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16338 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
16339 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
16340 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16341 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16342 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16343 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16344 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16345 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30]
16346 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
16347 ; BITALG_NOVLX-NEXT: vzeroupper
16348 ; BITALG_NOVLX-NEXT: retq
16350 ; BITALG-LABEL: ult_30_v4i32:
16352 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
16353 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
16354 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16355 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16356 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16357 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16358 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16359 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30]
16360 ; BITALG-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
16361 ; BITALG-NEXT: retq
16362 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
16363 %3 = icmp ult <4 x i32> %2, <i32 30, i32 30, i32 30, i32 30>
16364 %4 = sext <4 x i1> %3 to <4 x i32>
16368 define <4 x i32> @ugt_30_v4i32(<4 x i32> %0) {
16369 ; SSE2-LABEL: ugt_30_v4i32:
16371 ; SSE2-NEXT: movdqa %xmm0, %xmm1
16372 ; SSE2-NEXT: psrlw $1, %xmm1
16373 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16374 ; SSE2-NEXT: psubb %xmm1, %xmm0
16375 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
16376 ; SSE2-NEXT: movdqa %xmm0, %xmm2
16377 ; SSE2-NEXT: pand %xmm1, %xmm2
16378 ; SSE2-NEXT: psrlw $2, %xmm0
16379 ; SSE2-NEXT: pand %xmm1, %xmm0
16380 ; SSE2-NEXT: paddb %xmm2, %xmm0
16381 ; SSE2-NEXT: movdqa %xmm0, %xmm1
16382 ; SSE2-NEXT: psrlw $4, %xmm1
16383 ; SSE2-NEXT: paddb %xmm1, %xmm0
16384 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
16385 ; SSE2-NEXT: pxor %xmm1, %xmm1
16386 ; SSE2-NEXT: movdqa %xmm0, %xmm2
16387 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
16388 ; SSE2-NEXT: psadbw %xmm1, %xmm2
16389 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
16390 ; SSE2-NEXT: psadbw %xmm1, %xmm0
16391 ; SSE2-NEXT: packuswb %xmm2, %xmm0
16392 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
16395 ; SSE3-LABEL: ugt_30_v4i32:
16397 ; SSE3-NEXT: movdqa %xmm0, %xmm1
16398 ; SSE3-NEXT: psrlw $1, %xmm1
16399 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16400 ; SSE3-NEXT: psubb %xmm1, %xmm0
16401 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
16402 ; SSE3-NEXT: movdqa %xmm0, %xmm2
16403 ; SSE3-NEXT: pand %xmm1, %xmm2
16404 ; SSE3-NEXT: psrlw $2, %xmm0
16405 ; SSE3-NEXT: pand %xmm1, %xmm0
16406 ; SSE3-NEXT: paddb %xmm2, %xmm0
16407 ; SSE3-NEXT: movdqa %xmm0, %xmm1
16408 ; SSE3-NEXT: psrlw $4, %xmm1
16409 ; SSE3-NEXT: paddb %xmm1, %xmm0
16410 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
16411 ; SSE3-NEXT: pxor %xmm1, %xmm1
16412 ; SSE3-NEXT: movdqa %xmm0, %xmm2
16413 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
16414 ; SSE3-NEXT: psadbw %xmm1, %xmm2
16415 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
16416 ; SSE3-NEXT: psadbw %xmm1, %xmm0
16417 ; SSE3-NEXT: packuswb %xmm2, %xmm0
16418 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
16421 ; SSSE3-LABEL: ugt_30_v4i32:
16423 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16424 ; SSSE3-NEXT: movdqa %xmm0, %xmm3
16425 ; SSSE3-NEXT: pand %xmm2, %xmm3
16426 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16427 ; SSSE3-NEXT: movdqa %xmm1, %xmm4
16428 ; SSSE3-NEXT: pshufb %xmm3, %xmm4
16429 ; SSSE3-NEXT: psrlw $4, %xmm0
16430 ; SSSE3-NEXT: pand %xmm2, %xmm0
16431 ; SSSE3-NEXT: pshufb %xmm0, %xmm1
16432 ; SSSE3-NEXT: paddb %xmm4, %xmm1
16433 ; SSSE3-NEXT: pxor %xmm0, %xmm0
16434 ; SSSE3-NEXT: movdqa %xmm1, %xmm2
16435 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
16436 ; SSSE3-NEXT: psadbw %xmm0, %xmm2
16437 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
16438 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
16439 ; SSSE3-NEXT: packuswb %xmm2, %xmm1
16440 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16441 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
16444 ; SSE41-LABEL: ugt_30_v4i32:
16446 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16447 ; SSE41-NEXT: movdqa %xmm0, %xmm2
16448 ; SSE41-NEXT: pand %xmm1, %xmm2
16449 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16450 ; SSE41-NEXT: movdqa %xmm3, %xmm4
16451 ; SSE41-NEXT: pshufb %xmm2, %xmm4
16452 ; SSE41-NEXT: psrlw $4, %xmm0
16453 ; SSE41-NEXT: pand %xmm1, %xmm0
16454 ; SSE41-NEXT: pshufb %xmm0, %xmm3
16455 ; SSE41-NEXT: paddb %xmm4, %xmm3
16456 ; SSE41-NEXT: pxor %xmm1, %xmm1
16457 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero
16458 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
16459 ; SSE41-NEXT: psadbw %xmm1, %xmm3
16460 ; SSE41-NEXT: psadbw %xmm1, %xmm0
16461 ; SSE41-NEXT: packuswb %xmm3, %xmm0
16462 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
16465 ; AVX1-LABEL: ugt_30_v4i32:
16467 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16468 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
16469 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16470 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
16471 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
16472 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
16473 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
16474 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
16475 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
16476 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16477 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16478 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16479 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16480 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16481 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
16484 ; AVX2-LABEL: ugt_30_v4i32:
16486 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16487 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
16488 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16489 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
16490 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
16491 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
16492 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
16493 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
16494 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
16495 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16496 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16497 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16498 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16499 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16500 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30]
16501 ; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
16504 ; AVX512VPOPCNTDQ-LABEL: ugt_30_v4i32:
16505 ; AVX512VPOPCNTDQ: # %bb.0:
16506 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16507 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
16508 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30]
16509 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
16510 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
16511 ; AVX512VPOPCNTDQ-NEXT: retq
16513 ; AVX512VPOPCNTDQVL-LABEL: ugt_30_v4i32:
16514 ; AVX512VPOPCNTDQVL: # %bb.0:
16515 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
16516 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30]
16517 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
16518 ; AVX512VPOPCNTDQVL-NEXT: retq
16520 ; BITALG_NOVLX-LABEL: ugt_30_v4i32:
16521 ; BITALG_NOVLX: # %bb.0:
16522 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16523 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
16524 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
16525 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16526 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16527 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16528 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16529 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16530 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30]
16531 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
16532 ; BITALG_NOVLX-NEXT: vzeroupper
16533 ; BITALG_NOVLX-NEXT: retq
16535 ; BITALG-LABEL: ugt_30_v4i32:
16537 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
16538 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
16539 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16540 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16541 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16542 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16543 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16544 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [30,30,30,30]
16545 ; BITALG-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
16546 ; BITALG-NEXT: retq
16547 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
16548 %3 = icmp ugt <4 x i32> %2, <i32 30, i32 30, i32 30, i32 30>
16549 %4 = sext <4 x i1> %3 to <4 x i32>
16553 define <4 x i32> @ult_31_v4i32(<4 x i32> %0) {
16554 ; SSE2-LABEL: ult_31_v4i32:
16556 ; SSE2-NEXT: movdqa %xmm0, %xmm1
16557 ; SSE2-NEXT: psrlw $1, %xmm1
16558 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16559 ; SSE2-NEXT: psubb %xmm1, %xmm0
16560 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
16561 ; SSE2-NEXT: movdqa %xmm0, %xmm2
16562 ; SSE2-NEXT: pand %xmm1, %xmm2
16563 ; SSE2-NEXT: psrlw $2, %xmm0
16564 ; SSE2-NEXT: pand %xmm1, %xmm0
16565 ; SSE2-NEXT: paddb %xmm2, %xmm0
16566 ; SSE2-NEXT: movdqa %xmm0, %xmm1
16567 ; SSE2-NEXT: psrlw $4, %xmm1
16568 ; SSE2-NEXT: paddb %xmm0, %xmm1
16569 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16570 ; SSE2-NEXT: pxor %xmm0, %xmm0
16571 ; SSE2-NEXT: movdqa %xmm1, %xmm2
16572 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
16573 ; SSE2-NEXT: psadbw %xmm0, %xmm2
16574 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
16575 ; SSE2-NEXT: psadbw %xmm0, %xmm1
16576 ; SSE2-NEXT: packuswb %xmm2, %xmm1
16577 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [31,31,31,31]
16578 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
16581 ; SSE3-LABEL: ult_31_v4i32:
16583 ; SSE3-NEXT: movdqa %xmm0, %xmm1
16584 ; SSE3-NEXT: psrlw $1, %xmm1
16585 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16586 ; SSE3-NEXT: psubb %xmm1, %xmm0
16587 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
16588 ; SSE3-NEXT: movdqa %xmm0, %xmm2
16589 ; SSE3-NEXT: pand %xmm1, %xmm2
16590 ; SSE3-NEXT: psrlw $2, %xmm0
16591 ; SSE3-NEXT: pand %xmm1, %xmm0
16592 ; SSE3-NEXT: paddb %xmm2, %xmm0
16593 ; SSE3-NEXT: movdqa %xmm0, %xmm1
16594 ; SSE3-NEXT: psrlw $4, %xmm1
16595 ; SSE3-NEXT: paddb %xmm0, %xmm1
16596 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16597 ; SSE3-NEXT: pxor %xmm0, %xmm0
16598 ; SSE3-NEXT: movdqa %xmm1, %xmm2
16599 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
16600 ; SSE3-NEXT: psadbw %xmm0, %xmm2
16601 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
16602 ; SSE3-NEXT: psadbw %xmm0, %xmm1
16603 ; SSE3-NEXT: packuswb %xmm2, %xmm1
16604 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [31,31,31,31]
16605 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
16608 ; SSSE3-LABEL: ult_31_v4i32:
16610 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16611 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
16612 ; SSSE3-NEXT: pand %xmm1, %xmm2
16613 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16614 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
16615 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
16616 ; SSSE3-NEXT: psrlw $4, %xmm0
16617 ; SSSE3-NEXT: pand %xmm1, %xmm0
16618 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
16619 ; SSSE3-NEXT: paddb %xmm4, %xmm3
16620 ; SSSE3-NEXT: pxor %xmm0, %xmm0
16621 ; SSSE3-NEXT: movdqa %xmm3, %xmm1
16622 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
16623 ; SSSE3-NEXT: psadbw %xmm0, %xmm1
16624 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
16625 ; SSSE3-NEXT: psadbw %xmm0, %xmm3
16626 ; SSSE3-NEXT: packuswb %xmm1, %xmm3
16627 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [31,31,31,31]
16628 ; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
16631 ; SSE41-LABEL: ult_31_v4i32:
16633 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16634 ; SSE41-NEXT: movdqa %xmm0, %xmm2
16635 ; SSE41-NEXT: pand %xmm1, %xmm2
16636 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16637 ; SSE41-NEXT: movdqa %xmm3, %xmm4
16638 ; SSE41-NEXT: pshufb %xmm2, %xmm4
16639 ; SSE41-NEXT: psrlw $4, %xmm0
16640 ; SSE41-NEXT: pand %xmm1, %xmm0
16641 ; SSE41-NEXT: pshufb %xmm0, %xmm3
16642 ; SSE41-NEXT: paddb %xmm4, %xmm3
16643 ; SSE41-NEXT: pxor %xmm0, %xmm0
16644 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero
16645 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
16646 ; SSE41-NEXT: psadbw %xmm0, %xmm3
16647 ; SSE41-NEXT: psadbw %xmm0, %xmm1
16648 ; SSE41-NEXT: packuswb %xmm3, %xmm1
16649 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [31,31,31,31]
16650 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
16653 ; AVX1-LABEL: ult_31_v4i32:
16655 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16656 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
16657 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16658 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
16659 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
16660 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
16661 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
16662 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
16663 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
16664 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16665 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16666 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16667 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16668 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16669 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [31,31,31,31]
16670 ; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
16673 ; AVX2-LABEL: ult_31_v4i32:
16675 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
16676 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
16677 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
16678 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
16679 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
16680 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
16681 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
16682 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
16683 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
16684 ; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16685 ; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16686 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16687 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16688 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16689 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [31,31,31,31]
16690 ; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
16693 ; AVX512VPOPCNTDQ-LABEL: ult_31_v4i32:
16694 ; AVX512VPOPCNTDQ: # %bb.0:
16695 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16696 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
16697 ; AVX512VPOPCNTDQ-NEXT: vpbroadcastd {{.*#+}} xmm1 = [31,31,31,31]
16698 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
16699 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
16700 ; AVX512VPOPCNTDQ-NEXT: retq
16702 ; AVX512VPOPCNTDQVL-LABEL: ult_31_v4i32:
16703 ; AVX512VPOPCNTDQVL: # %bb.0:
16704 ; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
16705 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [31,31,31,31]
16706 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
16707 ; AVX512VPOPCNTDQVL-NEXT: retq
16709 ; BITALG_NOVLX-LABEL: ult_31_v4i32:
16710 ; BITALG_NOVLX: # %bb.0:
16711 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16712 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
16713 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
16714 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16715 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16716 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16717 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16718 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16719 ; BITALG_NOVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [31,31,31,31]
16720 ; BITALG_NOVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
16721 ; BITALG_NOVLX-NEXT: vzeroupper
16722 ; BITALG_NOVLX-NEXT: retq
16724 ; BITALG-LABEL: ult_31_v4i32:
16726 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
16727 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
16728 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
16729 ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
16730 ; BITALG-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
16731 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
16732 ; BITALG-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
16733 ; BITALG-NEXT: vpbroadcastd {{.*#+}} xmm1 = [31,31,31,31]
16734 ; BITALG-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
16735 ; BITALG-NEXT: retq
16736 %2 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0)
16737 %3 = icmp ult <4 x i32> %2, <i32 31, i32 31, i32 31, i32 31>
16738 %4 = sext <4 x i1> %3 to <4 x i32>
16742 define <2 x i64> @ugt_1_v2i64(<2 x i64> %0) {
16743 ; SSE2-LABEL: ugt_1_v2i64:
16745 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
16746 ; SSE2-NEXT: movdqa %xmm0, %xmm2
16747 ; SSE2-NEXT: paddq %xmm1, %xmm2
16748 ; SSE2-NEXT: pand %xmm2, %xmm0
16749 ; SSE2-NEXT: pxor %xmm2, %xmm2
16750 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
16751 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,0,3,2]
16752 ; SSE2-NEXT: pand %xmm2, %xmm0
16753 ; SSE2-NEXT: pxor %xmm1, %xmm0
16756 ; SSE3-LABEL: ugt_1_v2i64:
16758 ; SSE3-NEXT: pcmpeqd %xmm1, %xmm1
16759 ; SSE3-NEXT: movdqa %xmm0, %xmm2
16760 ; SSE3-NEXT: paddq %xmm1, %xmm2
16761 ; SSE3-NEXT: pand %xmm2, %xmm0
16762 ; SSE3-NEXT: pxor %xmm2, %xmm2
16763 ; SSE3-NEXT: pcmpeqd %xmm2, %xmm0
16764 ; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,0,3,2]
16765 ; SSE3-NEXT: pand %xmm2, %xmm0
16766 ; SSE3-NEXT: pxor %xmm1, %xmm0
16769 ; SSSE3-LABEL: ugt_1_v2i64:
16771 ; SSSE3-NEXT: pcmpeqd %xmm1, %xmm1
16772 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
16773 ; SSSE3-NEXT: paddq %xmm1, %xmm2
16774 ; SSSE3-NEXT: pand %xmm2, %xmm0
16775 ; SSSE3-NEXT: pxor %xmm2, %xmm2
16776 ; SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
16777 ; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,0,3,2]
16778 ; SSSE3-NEXT: pand %xmm2, %xmm0
16779 ; SSSE3-NEXT: pxor %xmm1, %xmm0
16782 ; SSE41-LABEL: ugt_1_v2i64:
16784 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
16785 ; SSE41-NEXT: movdqa %xmm0, %xmm2
16786 ; SSE41-NEXT: paddq %xmm1, %xmm2
16787 ; SSE41-NEXT: pand %xmm2, %xmm0
16788 ; SSE41-NEXT: pxor %xmm2, %xmm2
16789 ; SSE41-NEXT: pcmpeqq %xmm2, %xmm0
16790 ; SSE41-NEXT: pxor %xmm1, %xmm0
16793 ; AVX1-LABEL: ugt_1_v2i64:
16795 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
16796 ; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm2
16797 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
16798 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
16799 ; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
16800 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
16803 ; AVX2-LABEL: ugt_1_v2i64:
16805 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
16806 ; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm2
16807 ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
16808 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
16809 ; AVX2-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
16810 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
16813 ; AVX512VPOPCNTDQ-LABEL: ugt_1_v2i64:
16814 ; AVX512VPOPCNTDQ: # %bb.0:
16815 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16816 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
16817 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
16818 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
16819 ; AVX512VPOPCNTDQ-NEXT: retq
16821 ; AVX512VPOPCNTDQVL-LABEL: ugt_1_v2i64:
16822 ; AVX512VPOPCNTDQVL: # %bb.0:
16823 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
16824 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,1]
16825 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
16826 ; AVX512VPOPCNTDQVL-NEXT: retq
16828 ; BITALG_NOVLX-LABEL: ugt_1_v2i64:
16829 ; BITALG_NOVLX: # %bb.0:
16830 ; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
16831 ; BITALG_NOVLX-NEXT: vpaddq %xmm1, %xmm0, %xmm1
16832 ; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
16833 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
16834 ; BITALG_NOVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
16835 ; BITALG_NOVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
16836 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
16837 ; BITALG_NOVLX-NEXT: vzeroupper
16838 ; BITALG_NOVLX-NEXT: retq
16840 ; BITALG-LABEL: ugt_1_v2i64:
16842 ; BITALG-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
16843 ; BITALG-NEXT: vpaddq %xmm1, %xmm0, %xmm1
16844 ; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm0
16845 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
16846 ; BITALG-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
16847 ; BITALG-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
16848 ; BITALG-NEXT: retq
16849 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
16850 %3 = icmp ugt <2 x i64> %2, <i64 1, i64 1>
16851 %4 = sext <2 x i1> %3 to <2 x i64>
16855 define <2 x i64> @ult_2_v2i64(<2 x i64> %0) {
16856 ; SSE2-LABEL: ult_2_v2i64:
16858 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
16859 ; SSE2-NEXT: paddq %xmm0, %xmm1
16860 ; SSE2-NEXT: pand %xmm1, %xmm0
16861 ; SSE2-NEXT: pxor %xmm1, %xmm1
16862 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
16863 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
16864 ; SSE2-NEXT: pand %xmm1, %xmm0
16867 ; SSE3-LABEL: ult_2_v2i64:
16869 ; SSE3-NEXT: pcmpeqd %xmm1, %xmm1
16870 ; SSE3-NEXT: paddq %xmm0, %xmm1
16871 ; SSE3-NEXT: pand %xmm1, %xmm0
16872 ; SSE3-NEXT: pxor %xmm1, %xmm1
16873 ; SSE3-NEXT: pcmpeqd %xmm1, %xmm0
16874 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
16875 ; SSE3-NEXT: pand %xmm1, %xmm0
16878 ; SSSE3-LABEL: ult_2_v2i64:
16880 ; SSSE3-NEXT: pcmpeqd %xmm1, %xmm1
16881 ; SSSE3-NEXT: paddq %xmm0, %xmm1
16882 ; SSSE3-NEXT: pand %xmm1, %xmm0
16883 ; SSSE3-NEXT: pxor %xmm1, %xmm1
16884 ; SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
16885 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
16886 ; SSSE3-NEXT: pand %xmm1, %xmm0
16889 ; SSE41-LABEL: ult_2_v2i64:
16891 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
16892 ; SSE41-NEXT: paddq %xmm0, %xmm1
16893 ; SSE41-NEXT: pand %xmm1, %xmm0
16894 ; SSE41-NEXT: pxor %xmm1, %xmm1
16895 ; SSE41-NEXT: pcmpeqq %xmm1, %xmm0
16898 ; AVX1-LABEL: ult_2_v2i64:
16900 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
16901 ; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm1
16902 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
16903 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
16904 ; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
16907 ; AVX2-LABEL: ult_2_v2i64:
16909 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
16910 ; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm1
16911 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
16912 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
16913 ; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
16916 ; AVX512VPOPCNTDQ-LABEL: ult_2_v2i64:
16917 ; AVX512VPOPCNTDQ: # %bb.0:
16918 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
16919 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
16920 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [2,2]
16921 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
16922 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
16923 ; AVX512VPOPCNTDQ-NEXT: retq
16925 ; AVX512VPOPCNTDQVL-LABEL: ult_2_v2i64:
16926 ; AVX512VPOPCNTDQVL: # %bb.0:
16927 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
16928 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2]
16929 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
16930 ; AVX512VPOPCNTDQVL-NEXT: retq
16932 ; BITALG_NOVLX-LABEL: ult_2_v2i64:
16933 ; BITALG_NOVLX: # %bb.0:
16934 ; BITALG_NOVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
16935 ; BITALG_NOVLX-NEXT: vpaddq %xmm1, %xmm0, %xmm1
16936 ; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
16937 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
16938 ; BITALG_NOVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
16939 ; BITALG_NOVLX-NEXT: retq
16941 ; BITALG-LABEL: ult_2_v2i64:
16943 ; BITALG-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
16944 ; BITALG-NEXT: vpaddq %xmm1, %xmm0, %xmm1
16945 ; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm0
16946 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
16947 ; BITALG-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
16948 ; BITALG-NEXT: retq
16949 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
16950 %3 = icmp ult <2 x i64> %2, <i64 2, i64 2>
16951 %4 = sext <2 x i1> %3 to <2 x i64>
16955 define <2 x i64> @ugt_2_v2i64(<2 x i64> %0) {
16956 ; SSE2-LABEL: ugt_2_v2i64:
16958 ; SSE2-NEXT: movdqa %xmm0, %xmm1
16959 ; SSE2-NEXT: psrlw $1, %xmm1
16960 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16961 ; SSE2-NEXT: psubb %xmm1, %xmm0
16962 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
16963 ; SSE2-NEXT: movdqa %xmm0, %xmm2
16964 ; SSE2-NEXT: pand %xmm1, %xmm2
16965 ; SSE2-NEXT: psrlw $2, %xmm0
16966 ; SSE2-NEXT: pand %xmm1, %xmm0
16967 ; SSE2-NEXT: paddb %xmm2, %xmm0
16968 ; SSE2-NEXT: movdqa %xmm0, %xmm1
16969 ; SSE2-NEXT: psrlw $4, %xmm1
16970 ; SSE2-NEXT: paddb %xmm0, %xmm1
16971 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16972 ; SSE2-NEXT: pxor %xmm0, %xmm0
16973 ; SSE2-NEXT: psadbw %xmm1, %xmm0
16974 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
16975 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
16978 ; SSE3-LABEL: ugt_2_v2i64:
16980 ; SSE3-NEXT: movdqa %xmm0, %xmm1
16981 ; SSE3-NEXT: psrlw $1, %xmm1
16982 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16983 ; SSE3-NEXT: psubb %xmm1, %xmm0
16984 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
16985 ; SSE3-NEXT: movdqa %xmm0, %xmm2
16986 ; SSE3-NEXT: pand %xmm1, %xmm2
16987 ; SSE3-NEXT: psrlw $2, %xmm0
16988 ; SSE3-NEXT: pand %xmm1, %xmm0
16989 ; SSE3-NEXT: paddb %xmm2, %xmm0
16990 ; SSE3-NEXT: movdqa %xmm0, %xmm1
16991 ; SSE3-NEXT: psrlw $4, %xmm1
16992 ; SSE3-NEXT: paddb %xmm0, %xmm1
16993 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
16994 ; SSE3-NEXT: pxor %xmm0, %xmm0
16995 ; SSE3-NEXT: psadbw %xmm1, %xmm0
16996 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
16997 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
17000 ; SSSE3-LABEL: ugt_2_v2i64:
17002 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17003 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
17004 ; SSSE3-NEXT: pand %xmm1, %xmm2
17005 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17006 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
17007 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
17008 ; SSSE3-NEXT: psrlw $4, %xmm0
17009 ; SSSE3-NEXT: pand %xmm1, %xmm0
17010 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
17011 ; SSSE3-NEXT: paddb %xmm4, %xmm3
17012 ; SSSE3-NEXT: pxor %xmm0, %xmm0
17013 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
17014 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
17015 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
17018 ; SSE41-LABEL: ugt_2_v2i64:
17020 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17021 ; SSE41-NEXT: movdqa %xmm0, %xmm2
17022 ; SSE41-NEXT: pand %xmm1, %xmm2
17023 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17024 ; SSE41-NEXT: movdqa %xmm3, %xmm4
17025 ; SSE41-NEXT: pshufb %xmm2, %xmm4
17026 ; SSE41-NEXT: psrlw $4, %xmm0
17027 ; SSE41-NEXT: pand %xmm1, %xmm0
17028 ; SSE41-NEXT: pshufb %xmm0, %xmm3
17029 ; SSE41-NEXT: paddb %xmm4, %xmm3
17030 ; SSE41-NEXT: pxor %xmm0, %xmm0
17031 ; SSE41-NEXT: psadbw %xmm3, %xmm0
17032 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
17033 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
17036 ; AVX1-LABEL: ugt_2_v2i64:
17038 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17039 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
17040 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17041 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
17042 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
17043 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
17044 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
17045 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
17046 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
17047 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17048 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
17051 ; AVX2-LABEL: ugt_2_v2i64:
17053 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17054 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
17055 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17056 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
17057 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
17058 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
17059 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
17060 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
17061 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
17062 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17063 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
17066 ; AVX512VPOPCNTDQ-LABEL: ugt_2_v2i64:
17067 ; AVX512VPOPCNTDQ: # %bb.0:
17068 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17069 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
17070 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
17071 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
17072 ; AVX512VPOPCNTDQ-NEXT: retq
17074 ; AVX512VPOPCNTDQVL-LABEL: ugt_2_v2i64:
17075 ; AVX512VPOPCNTDQVL: # %bb.0:
17076 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
17077 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2]
17078 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
17079 ; AVX512VPOPCNTDQVL-NEXT: retq
17081 ; BITALG_NOVLX-LABEL: ugt_2_v2i64:
17082 ; BITALG_NOVLX: # %bb.0:
17083 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17084 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
17085 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
17086 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17087 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
17088 ; BITALG_NOVLX-NEXT: vzeroupper
17089 ; BITALG_NOVLX-NEXT: retq
17091 ; BITALG-LABEL: ugt_2_v2i64:
17093 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
17094 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
17095 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17096 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2]
17097 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
17098 ; BITALG-NEXT: retq
17099 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
17100 %3 = icmp ugt <2 x i64> %2, <i64 2, i64 2>
17101 %4 = sext <2 x i1> %3 to <2 x i64>
17105 define <2 x i64> @ult_3_v2i64(<2 x i64> %0) {
17106 ; SSE2-LABEL: ult_3_v2i64:
17108 ; SSE2-NEXT: movdqa %xmm0, %xmm1
17109 ; SSE2-NEXT: psrlw $1, %xmm1
17110 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17111 ; SSE2-NEXT: psubb %xmm1, %xmm0
17112 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
17113 ; SSE2-NEXT: movdqa %xmm0, %xmm2
17114 ; SSE2-NEXT: pand %xmm1, %xmm2
17115 ; SSE2-NEXT: psrlw $2, %xmm0
17116 ; SSE2-NEXT: pand %xmm1, %xmm0
17117 ; SSE2-NEXT: paddb %xmm2, %xmm0
17118 ; SSE2-NEXT: movdqa %xmm0, %xmm1
17119 ; SSE2-NEXT: psrlw $4, %xmm1
17120 ; SSE2-NEXT: paddb %xmm0, %xmm1
17121 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17122 ; SSE2-NEXT: pxor %xmm0, %xmm0
17123 ; SSE2-NEXT: psadbw %xmm1, %xmm0
17124 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
17125 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3]
17126 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
17129 ; SSE3-LABEL: ult_3_v2i64:
17131 ; SSE3-NEXT: movdqa %xmm0, %xmm1
17132 ; SSE3-NEXT: psrlw $1, %xmm1
17133 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17134 ; SSE3-NEXT: psubb %xmm1, %xmm0
17135 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
17136 ; SSE3-NEXT: movdqa %xmm0, %xmm2
17137 ; SSE3-NEXT: pand %xmm1, %xmm2
17138 ; SSE3-NEXT: psrlw $2, %xmm0
17139 ; SSE3-NEXT: pand %xmm1, %xmm0
17140 ; SSE3-NEXT: paddb %xmm2, %xmm0
17141 ; SSE3-NEXT: movdqa %xmm0, %xmm1
17142 ; SSE3-NEXT: psrlw $4, %xmm1
17143 ; SSE3-NEXT: paddb %xmm0, %xmm1
17144 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17145 ; SSE3-NEXT: pxor %xmm0, %xmm0
17146 ; SSE3-NEXT: psadbw %xmm1, %xmm0
17147 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
17148 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3]
17149 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
17152 ; SSSE3-LABEL: ult_3_v2i64:
17154 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17155 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
17156 ; SSSE3-NEXT: pand %xmm1, %xmm2
17157 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17158 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
17159 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
17160 ; SSSE3-NEXT: psrlw $4, %xmm0
17161 ; SSSE3-NEXT: pand %xmm1, %xmm0
17162 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
17163 ; SSSE3-NEXT: paddb %xmm4, %xmm3
17164 ; SSSE3-NEXT: pxor %xmm0, %xmm0
17165 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
17166 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
17167 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3]
17168 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
17171 ; SSE41-LABEL: ult_3_v2i64:
17173 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17174 ; SSE41-NEXT: movdqa %xmm0, %xmm2
17175 ; SSE41-NEXT: pand %xmm1, %xmm2
17176 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17177 ; SSE41-NEXT: movdqa %xmm3, %xmm4
17178 ; SSE41-NEXT: pshufb %xmm2, %xmm4
17179 ; SSE41-NEXT: psrlw $4, %xmm0
17180 ; SSE41-NEXT: pand %xmm1, %xmm0
17181 ; SSE41-NEXT: pshufb %xmm0, %xmm3
17182 ; SSE41-NEXT: paddb %xmm4, %xmm3
17183 ; SSE41-NEXT: pxor %xmm0, %xmm0
17184 ; SSE41-NEXT: psadbw %xmm3, %xmm0
17185 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
17186 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [3,3,3,3]
17187 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
17190 ; AVX1-LABEL: ult_3_v2i64:
17192 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17193 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
17194 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17195 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
17196 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
17197 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
17198 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
17199 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
17200 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
17201 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17202 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [3,3]
17203 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
17206 ; AVX2-LABEL: ult_3_v2i64:
17208 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17209 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
17210 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17211 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
17212 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
17213 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
17214 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
17215 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
17216 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
17217 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17218 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [3,3]
17219 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
17222 ; AVX512VPOPCNTDQ-LABEL: ult_3_v2i64:
17223 ; AVX512VPOPCNTDQ: # %bb.0:
17224 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17225 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
17226 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [3,3]
17227 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
17228 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
17229 ; AVX512VPOPCNTDQ-NEXT: retq
17231 ; AVX512VPOPCNTDQVL-LABEL: ult_3_v2i64:
17232 ; AVX512VPOPCNTDQVL: # %bb.0:
17233 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
17234 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [3,3]
17235 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
17236 ; AVX512VPOPCNTDQVL-NEXT: retq
17238 ; BITALG_NOVLX-LABEL: ult_3_v2i64:
17239 ; BITALG_NOVLX: # %bb.0:
17240 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17241 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
17242 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
17243 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17244 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [3,3]
17245 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
17246 ; BITALG_NOVLX-NEXT: vzeroupper
17247 ; BITALG_NOVLX-NEXT: retq
17249 ; BITALG-LABEL: ult_3_v2i64:
17251 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
17252 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
17253 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17254 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [3,3]
17255 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
17256 ; BITALG-NEXT: retq
17257 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
17258 %3 = icmp ult <2 x i64> %2, <i64 3, i64 3>
17259 %4 = sext <2 x i1> %3 to <2 x i64>
17263 define <2 x i64> @ugt_3_v2i64(<2 x i64> %0) {
17264 ; SSE2-LABEL: ugt_3_v2i64:
17266 ; SSE2-NEXT: movdqa %xmm0, %xmm1
17267 ; SSE2-NEXT: psrlw $1, %xmm1
17268 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17269 ; SSE2-NEXT: psubb %xmm1, %xmm0
17270 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
17271 ; SSE2-NEXT: movdqa %xmm0, %xmm2
17272 ; SSE2-NEXT: pand %xmm1, %xmm2
17273 ; SSE2-NEXT: psrlw $2, %xmm0
17274 ; SSE2-NEXT: pand %xmm1, %xmm0
17275 ; SSE2-NEXT: paddb %xmm2, %xmm0
17276 ; SSE2-NEXT: movdqa %xmm0, %xmm1
17277 ; SSE2-NEXT: psrlw $4, %xmm1
17278 ; SSE2-NEXT: paddb %xmm0, %xmm1
17279 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17280 ; SSE2-NEXT: pxor %xmm0, %xmm0
17281 ; SSE2-NEXT: psadbw %xmm1, %xmm0
17282 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
17283 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
17286 ; SSE3-LABEL: ugt_3_v2i64:
17288 ; SSE3-NEXT: movdqa %xmm0, %xmm1
17289 ; SSE3-NEXT: psrlw $1, %xmm1
17290 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17291 ; SSE3-NEXT: psubb %xmm1, %xmm0
17292 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
17293 ; SSE3-NEXT: movdqa %xmm0, %xmm2
17294 ; SSE3-NEXT: pand %xmm1, %xmm2
17295 ; SSE3-NEXT: psrlw $2, %xmm0
17296 ; SSE3-NEXT: pand %xmm1, %xmm0
17297 ; SSE3-NEXT: paddb %xmm2, %xmm0
17298 ; SSE3-NEXT: movdqa %xmm0, %xmm1
17299 ; SSE3-NEXT: psrlw $4, %xmm1
17300 ; SSE3-NEXT: paddb %xmm0, %xmm1
17301 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17302 ; SSE3-NEXT: pxor %xmm0, %xmm0
17303 ; SSE3-NEXT: psadbw %xmm1, %xmm0
17304 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
17305 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
17308 ; SSSE3-LABEL: ugt_3_v2i64:
17310 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17311 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
17312 ; SSSE3-NEXT: pand %xmm1, %xmm2
17313 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17314 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
17315 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
17316 ; SSSE3-NEXT: psrlw $4, %xmm0
17317 ; SSSE3-NEXT: pand %xmm1, %xmm0
17318 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
17319 ; SSSE3-NEXT: paddb %xmm4, %xmm3
17320 ; SSSE3-NEXT: pxor %xmm0, %xmm0
17321 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
17322 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
17323 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
17326 ; SSE41-LABEL: ugt_3_v2i64:
17328 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17329 ; SSE41-NEXT: movdqa %xmm0, %xmm2
17330 ; SSE41-NEXT: pand %xmm1, %xmm2
17331 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17332 ; SSE41-NEXT: movdqa %xmm3, %xmm4
17333 ; SSE41-NEXT: pshufb %xmm2, %xmm4
17334 ; SSE41-NEXT: psrlw $4, %xmm0
17335 ; SSE41-NEXT: pand %xmm1, %xmm0
17336 ; SSE41-NEXT: pshufb %xmm0, %xmm3
17337 ; SSE41-NEXT: paddb %xmm4, %xmm3
17338 ; SSE41-NEXT: pxor %xmm0, %xmm0
17339 ; SSE41-NEXT: psadbw %xmm3, %xmm0
17340 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
17341 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
17344 ; AVX1-LABEL: ugt_3_v2i64:
17346 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17347 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
17348 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17349 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
17350 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
17351 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
17352 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
17353 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
17354 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
17355 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17356 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
17359 ; AVX2-LABEL: ugt_3_v2i64:
17361 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17362 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
17363 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17364 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
17365 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
17366 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
17367 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
17368 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
17369 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
17370 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17371 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
17374 ; AVX512VPOPCNTDQ-LABEL: ugt_3_v2i64:
17375 ; AVX512VPOPCNTDQ: # %bb.0:
17376 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17377 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
17378 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
17379 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
17380 ; AVX512VPOPCNTDQ-NEXT: retq
17382 ; AVX512VPOPCNTDQVL-LABEL: ugt_3_v2i64:
17383 ; AVX512VPOPCNTDQVL: # %bb.0:
17384 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
17385 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [3,3]
17386 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
17387 ; AVX512VPOPCNTDQVL-NEXT: retq
17389 ; BITALG_NOVLX-LABEL: ugt_3_v2i64:
17390 ; BITALG_NOVLX: # %bb.0:
17391 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17392 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
17393 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
17394 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17395 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
17396 ; BITALG_NOVLX-NEXT: vzeroupper
17397 ; BITALG_NOVLX-NEXT: retq
17399 ; BITALG-LABEL: ugt_3_v2i64:
17401 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
17402 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
17403 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17404 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [3,3]
17405 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
17406 ; BITALG-NEXT: retq
17407 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
17408 %3 = icmp ugt <2 x i64> %2, <i64 3, i64 3>
17409 %4 = sext <2 x i1> %3 to <2 x i64>
17413 define <2 x i64> @ult_4_v2i64(<2 x i64> %0) {
17414 ; SSE2-LABEL: ult_4_v2i64:
17416 ; SSE2-NEXT: movdqa %xmm0, %xmm1
17417 ; SSE2-NEXT: psrlw $1, %xmm1
17418 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17419 ; SSE2-NEXT: psubb %xmm1, %xmm0
17420 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
17421 ; SSE2-NEXT: movdqa %xmm0, %xmm2
17422 ; SSE2-NEXT: pand %xmm1, %xmm2
17423 ; SSE2-NEXT: psrlw $2, %xmm0
17424 ; SSE2-NEXT: pand %xmm1, %xmm0
17425 ; SSE2-NEXT: paddb %xmm2, %xmm0
17426 ; SSE2-NEXT: movdqa %xmm0, %xmm1
17427 ; SSE2-NEXT: psrlw $4, %xmm1
17428 ; SSE2-NEXT: paddb %xmm0, %xmm1
17429 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17430 ; SSE2-NEXT: pxor %xmm0, %xmm0
17431 ; SSE2-NEXT: psadbw %xmm1, %xmm0
17432 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
17433 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4]
17434 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
17437 ; SSE3-LABEL: ult_4_v2i64:
17439 ; SSE3-NEXT: movdqa %xmm0, %xmm1
17440 ; SSE3-NEXT: psrlw $1, %xmm1
17441 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17442 ; SSE3-NEXT: psubb %xmm1, %xmm0
17443 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
17444 ; SSE3-NEXT: movdqa %xmm0, %xmm2
17445 ; SSE3-NEXT: pand %xmm1, %xmm2
17446 ; SSE3-NEXT: psrlw $2, %xmm0
17447 ; SSE3-NEXT: pand %xmm1, %xmm0
17448 ; SSE3-NEXT: paddb %xmm2, %xmm0
17449 ; SSE3-NEXT: movdqa %xmm0, %xmm1
17450 ; SSE3-NEXT: psrlw $4, %xmm1
17451 ; SSE3-NEXT: paddb %xmm0, %xmm1
17452 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17453 ; SSE3-NEXT: pxor %xmm0, %xmm0
17454 ; SSE3-NEXT: psadbw %xmm1, %xmm0
17455 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
17456 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4]
17457 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
17460 ; SSSE3-LABEL: ult_4_v2i64:
17462 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17463 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
17464 ; SSSE3-NEXT: pand %xmm1, %xmm2
17465 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17466 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
17467 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
17468 ; SSSE3-NEXT: psrlw $4, %xmm0
17469 ; SSSE3-NEXT: pand %xmm1, %xmm0
17470 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
17471 ; SSSE3-NEXT: paddb %xmm4, %xmm3
17472 ; SSSE3-NEXT: pxor %xmm0, %xmm0
17473 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
17474 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
17475 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4]
17476 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
17479 ; SSE41-LABEL: ult_4_v2i64:
17481 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17482 ; SSE41-NEXT: movdqa %xmm0, %xmm2
17483 ; SSE41-NEXT: pand %xmm1, %xmm2
17484 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17485 ; SSE41-NEXT: movdqa %xmm3, %xmm4
17486 ; SSE41-NEXT: pshufb %xmm2, %xmm4
17487 ; SSE41-NEXT: psrlw $4, %xmm0
17488 ; SSE41-NEXT: pand %xmm1, %xmm0
17489 ; SSE41-NEXT: pshufb %xmm0, %xmm3
17490 ; SSE41-NEXT: paddb %xmm4, %xmm3
17491 ; SSE41-NEXT: pxor %xmm0, %xmm0
17492 ; SSE41-NEXT: psadbw %xmm3, %xmm0
17493 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
17494 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [4,4,4,4]
17495 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
17498 ; AVX1-LABEL: ult_4_v2i64:
17500 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17501 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
17502 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17503 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
17504 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
17505 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
17506 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
17507 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
17508 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
17509 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17510 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [4,4]
17511 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
17514 ; AVX2-LABEL: ult_4_v2i64:
17516 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17517 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
17518 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17519 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
17520 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
17521 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
17522 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
17523 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
17524 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
17525 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17526 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [4,4]
17527 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
17530 ; AVX512VPOPCNTDQ-LABEL: ult_4_v2i64:
17531 ; AVX512VPOPCNTDQ: # %bb.0:
17532 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17533 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
17534 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [4,4]
17535 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
17536 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
17537 ; AVX512VPOPCNTDQ-NEXT: retq
17539 ; AVX512VPOPCNTDQVL-LABEL: ult_4_v2i64:
17540 ; AVX512VPOPCNTDQVL: # %bb.0:
17541 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
17542 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4,4]
17543 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
17544 ; AVX512VPOPCNTDQVL-NEXT: retq
17546 ; BITALG_NOVLX-LABEL: ult_4_v2i64:
17547 ; BITALG_NOVLX: # %bb.0:
17548 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17549 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
17550 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
17551 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17552 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [4,4]
17553 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
17554 ; BITALG_NOVLX-NEXT: vzeroupper
17555 ; BITALG_NOVLX-NEXT: retq
17557 ; BITALG-LABEL: ult_4_v2i64:
17559 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
17560 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
17561 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17562 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4,4]
17563 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
17564 ; BITALG-NEXT: retq
17565 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
17566 %3 = icmp ult <2 x i64> %2, <i64 4, i64 4>
17567 %4 = sext <2 x i1> %3 to <2 x i64>
17571 define <2 x i64> @ugt_4_v2i64(<2 x i64> %0) {
17572 ; SSE2-LABEL: ugt_4_v2i64:
17574 ; SSE2-NEXT: movdqa %xmm0, %xmm1
17575 ; SSE2-NEXT: psrlw $1, %xmm1
17576 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17577 ; SSE2-NEXT: psubb %xmm1, %xmm0
17578 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
17579 ; SSE2-NEXT: movdqa %xmm0, %xmm2
17580 ; SSE2-NEXT: pand %xmm1, %xmm2
17581 ; SSE2-NEXT: psrlw $2, %xmm0
17582 ; SSE2-NEXT: pand %xmm1, %xmm0
17583 ; SSE2-NEXT: paddb %xmm2, %xmm0
17584 ; SSE2-NEXT: movdqa %xmm0, %xmm1
17585 ; SSE2-NEXT: psrlw $4, %xmm1
17586 ; SSE2-NEXT: paddb %xmm0, %xmm1
17587 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17588 ; SSE2-NEXT: pxor %xmm0, %xmm0
17589 ; SSE2-NEXT: psadbw %xmm1, %xmm0
17590 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
17591 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
17594 ; SSE3-LABEL: ugt_4_v2i64:
17596 ; SSE3-NEXT: movdqa %xmm0, %xmm1
17597 ; SSE3-NEXT: psrlw $1, %xmm1
17598 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17599 ; SSE3-NEXT: psubb %xmm1, %xmm0
17600 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
17601 ; SSE3-NEXT: movdqa %xmm0, %xmm2
17602 ; SSE3-NEXT: pand %xmm1, %xmm2
17603 ; SSE3-NEXT: psrlw $2, %xmm0
17604 ; SSE3-NEXT: pand %xmm1, %xmm0
17605 ; SSE3-NEXT: paddb %xmm2, %xmm0
17606 ; SSE3-NEXT: movdqa %xmm0, %xmm1
17607 ; SSE3-NEXT: psrlw $4, %xmm1
17608 ; SSE3-NEXT: paddb %xmm0, %xmm1
17609 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17610 ; SSE3-NEXT: pxor %xmm0, %xmm0
17611 ; SSE3-NEXT: psadbw %xmm1, %xmm0
17612 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
17613 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
17616 ; SSSE3-LABEL: ugt_4_v2i64:
17618 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17619 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
17620 ; SSSE3-NEXT: pand %xmm1, %xmm2
17621 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17622 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
17623 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
17624 ; SSSE3-NEXT: psrlw $4, %xmm0
17625 ; SSSE3-NEXT: pand %xmm1, %xmm0
17626 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
17627 ; SSSE3-NEXT: paddb %xmm4, %xmm3
17628 ; SSSE3-NEXT: pxor %xmm0, %xmm0
17629 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
17630 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
17631 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
17634 ; SSE41-LABEL: ugt_4_v2i64:
17636 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17637 ; SSE41-NEXT: movdqa %xmm0, %xmm2
17638 ; SSE41-NEXT: pand %xmm1, %xmm2
17639 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17640 ; SSE41-NEXT: movdqa %xmm3, %xmm4
17641 ; SSE41-NEXT: pshufb %xmm2, %xmm4
17642 ; SSE41-NEXT: psrlw $4, %xmm0
17643 ; SSE41-NEXT: pand %xmm1, %xmm0
17644 ; SSE41-NEXT: pshufb %xmm0, %xmm3
17645 ; SSE41-NEXT: paddb %xmm4, %xmm3
17646 ; SSE41-NEXT: pxor %xmm0, %xmm0
17647 ; SSE41-NEXT: psadbw %xmm3, %xmm0
17648 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
17649 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
17652 ; AVX1-LABEL: ugt_4_v2i64:
17654 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17655 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
17656 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17657 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
17658 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
17659 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
17660 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
17661 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
17662 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
17663 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17664 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
17667 ; AVX2-LABEL: ugt_4_v2i64:
17669 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17670 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
17671 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17672 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
17673 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
17674 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
17675 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
17676 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
17677 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
17678 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17679 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
17682 ; AVX512VPOPCNTDQ-LABEL: ugt_4_v2i64:
17683 ; AVX512VPOPCNTDQ: # %bb.0:
17684 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17685 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
17686 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
17687 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
17688 ; AVX512VPOPCNTDQ-NEXT: retq
17690 ; AVX512VPOPCNTDQVL-LABEL: ugt_4_v2i64:
17691 ; AVX512VPOPCNTDQVL: # %bb.0:
17692 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
17693 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4,4]
17694 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
17695 ; AVX512VPOPCNTDQVL-NEXT: retq
17697 ; BITALG_NOVLX-LABEL: ugt_4_v2i64:
17698 ; BITALG_NOVLX: # %bb.0:
17699 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17700 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
17701 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
17702 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17703 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
17704 ; BITALG_NOVLX-NEXT: vzeroupper
17705 ; BITALG_NOVLX-NEXT: retq
17707 ; BITALG-LABEL: ugt_4_v2i64:
17709 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
17710 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
17711 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17712 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4,4]
17713 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
17714 ; BITALG-NEXT: retq
17715 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
17716 %3 = icmp ugt <2 x i64> %2, <i64 4, i64 4>
17717 %4 = sext <2 x i1> %3 to <2 x i64>
17721 define <2 x i64> @ult_5_v2i64(<2 x i64> %0) {
17722 ; SSE2-LABEL: ult_5_v2i64:
17724 ; SSE2-NEXT: movdqa %xmm0, %xmm1
17725 ; SSE2-NEXT: psrlw $1, %xmm1
17726 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17727 ; SSE2-NEXT: psubb %xmm1, %xmm0
17728 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
17729 ; SSE2-NEXT: movdqa %xmm0, %xmm2
17730 ; SSE2-NEXT: pand %xmm1, %xmm2
17731 ; SSE2-NEXT: psrlw $2, %xmm0
17732 ; SSE2-NEXT: pand %xmm1, %xmm0
17733 ; SSE2-NEXT: paddb %xmm2, %xmm0
17734 ; SSE2-NEXT: movdqa %xmm0, %xmm1
17735 ; SSE2-NEXT: psrlw $4, %xmm1
17736 ; SSE2-NEXT: paddb %xmm0, %xmm1
17737 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17738 ; SSE2-NEXT: pxor %xmm0, %xmm0
17739 ; SSE2-NEXT: psadbw %xmm1, %xmm0
17740 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
17741 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5]
17742 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
17745 ; SSE3-LABEL: ult_5_v2i64:
17747 ; SSE3-NEXT: movdqa %xmm0, %xmm1
17748 ; SSE3-NEXT: psrlw $1, %xmm1
17749 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17750 ; SSE3-NEXT: psubb %xmm1, %xmm0
17751 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
17752 ; SSE3-NEXT: movdqa %xmm0, %xmm2
17753 ; SSE3-NEXT: pand %xmm1, %xmm2
17754 ; SSE3-NEXT: psrlw $2, %xmm0
17755 ; SSE3-NEXT: pand %xmm1, %xmm0
17756 ; SSE3-NEXT: paddb %xmm2, %xmm0
17757 ; SSE3-NEXT: movdqa %xmm0, %xmm1
17758 ; SSE3-NEXT: psrlw $4, %xmm1
17759 ; SSE3-NEXT: paddb %xmm0, %xmm1
17760 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17761 ; SSE3-NEXT: pxor %xmm0, %xmm0
17762 ; SSE3-NEXT: psadbw %xmm1, %xmm0
17763 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
17764 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5]
17765 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
17768 ; SSSE3-LABEL: ult_5_v2i64:
17770 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17771 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
17772 ; SSSE3-NEXT: pand %xmm1, %xmm2
17773 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17774 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
17775 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
17776 ; SSSE3-NEXT: psrlw $4, %xmm0
17777 ; SSSE3-NEXT: pand %xmm1, %xmm0
17778 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
17779 ; SSSE3-NEXT: paddb %xmm4, %xmm3
17780 ; SSSE3-NEXT: pxor %xmm0, %xmm0
17781 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
17782 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
17783 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5]
17784 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
17787 ; SSE41-LABEL: ult_5_v2i64:
17789 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17790 ; SSE41-NEXT: movdqa %xmm0, %xmm2
17791 ; SSE41-NEXT: pand %xmm1, %xmm2
17792 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17793 ; SSE41-NEXT: movdqa %xmm3, %xmm4
17794 ; SSE41-NEXT: pshufb %xmm2, %xmm4
17795 ; SSE41-NEXT: psrlw $4, %xmm0
17796 ; SSE41-NEXT: pand %xmm1, %xmm0
17797 ; SSE41-NEXT: pshufb %xmm0, %xmm3
17798 ; SSE41-NEXT: paddb %xmm4, %xmm3
17799 ; SSE41-NEXT: pxor %xmm0, %xmm0
17800 ; SSE41-NEXT: psadbw %xmm3, %xmm0
17801 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
17802 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [5,5,5,5]
17803 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
17806 ; AVX1-LABEL: ult_5_v2i64:
17808 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17809 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
17810 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17811 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
17812 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
17813 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
17814 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
17815 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
17816 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
17817 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17818 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [5,5]
17819 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
17822 ; AVX2-LABEL: ult_5_v2i64:
17824 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17825 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
17826 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17827 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
17828 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
17829 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
17830 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
17831 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
17832 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
17833 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17834 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [5,5]
17835 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
17838 ; AVX512VPOPCNTDQ-LABEL: ult_5_v2i64:
17839 ; AVX512VPOPCNTDQ: # %bb.0:
17840 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17841 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
17842 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [5,5]
17843 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
17844 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
17845 ; AVX512VPOPCNTDQ-NEXT: retq
17847 ; AVX512VPOPCNTDQVL-LABEL: ult_5_v2i64:
17848 ; AVX512VPOPCNTDQVL: # %bb.0:
17849 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
17850 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [5,5]
17851 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
17852 ; AVX512VPOPCNTDQVL-NEXT: retq
17854 ; BITALG_NOVLX-LABEL: ult_5_v2i64:
17855 ; BITALG_NOVLX: # %bb.0:
17856 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17857 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
17858 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
17859 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17860 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [5,5]
17861 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
17862 ; BITALG_NOVLX-NEXT: vzeroupper
17863 ; BITALG_NOVLX-NEXT: retq
17865 ; BITALG-LABEL: ult_5_v2i64:
17867 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
17868 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
17869 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17870 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [5,5]
17871 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
17872 ; BITALG-NEXT: retq
17873 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
17874 %3 = icmp ult <2 x i64> %2, <i64 5, i64 5>
17875 %4 = sext <2 x i1> %3 to <2 x i64>
17879 define <2 x i64> @ugt_5_v2i64(<2 x i64> %0) {
17880 ; SSE2-LABEL: ugt_5_v2i64:
17882 ; SSE2-NEXT: movdqa %xmm0, %xmm1
17883 ; SSE2-NEXT: psrlw $1, %xmm1
17884 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17885 ; SSE2-NEXT: psubb %xmm1, %xmm0
17886 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
17887 ; SSE2-NEXT: movdqa %xmm0, %xmm2
17888 ; SSE2-NEXT: pand %xmm1, %xmm2
17889 ; SSE2-NEXT: psrlw $2, %xmm0
17890 ; SSE2-NEXT: pand %xmm1, %xmm0
17891 ; SSE2-NEXT: paddb %xmm2, %xmm0
17892 ; SSE2-NEXT: movdqa %xmm0, %xmm1
17893 ; SSE2-NEXT: psrlw $4, %xmm1
17894 ; SSE2-NEXT: paddb %xmm0, %xmm1
17895 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17896 ; SSE2-NEXT: pxor %xmm0, %xmm0
17897 ; SSE2-NEXT: psadbw %xmm1, %xmm0
17898 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
17899 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
17902 ; SSE3-LABEL: ugt_5_v2i64:
17904 ; SSE3-NEXT: movdqa %xmm0, %xmm1
17905 ; SSE3-NEXT: psrlw $1, %xmm1
17906 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17907 ; SSE3-NEXT: psubb %xmm1, %xmm0
17908 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
17909 ; SSE3-NEXT: movdqa %xmm0, %xmm2
17910 ; SSE3-NEXT: pand %xmm1, %xmm2
17911 ; SSE3-NEXT: psrlw $2, %xmm0
17912 ; SSE3-NEXT: pand %xmm1, %xmm0
17913 ; SSE3-NEXT: paddb %xmm2, %xmm0
17914 ; SSE3-NEXT: movdqa %xmm0, %xmm1
17915 ; SSE3-NEXT: psrlw $4, %xmm1
17916 ; SSE3-NEXT: paddb %xmm0, %xmm1
17917 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
17918 ; SSE3-NEXT: pxor %xmm0, %xmm0
17919 ; SSE3-NEXT: psadbw %xmm1, %xmm0
17920 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
17921 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
17924 ; SSSE3-LABEL: ugt_5_v2i64:
17926 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17927 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
17928 ; SSSE3-NEXT: pand %xmm1, %xmm2
17929 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17930 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
17931 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
17932 ; SSSE3-NEXT: psrlw $4, %xmm0
17933 ; SSSE3-NEXT: pand %xmm1, %xmm0
17934 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
17935 ; SSSE3-NEXT: paddb %xmm4, %xmm3
17936 ; SSSE3-NEXT: pxor %xmm0, %xmm0
17937 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
17938 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
17939 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
17942 ; SSE41-LABEL: ugt_5_v2i64:
17944 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17945 ; SSE41-NEXT: movdqa %xmm0, %xmm2
17946 ; SSE41-NEXT: pand %xmm1, %xmm2
17947 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17948 ; SSE41-NEXT: movdqa %xmm3, %xmm4
17949 ; SSE41-NEXT: pshufb %xmm2, %xmm4
17950 ; SSE41-NEXT: psrlw $4, %xmm0
17951 ; SSE41-NEXT: pand %xmm1, %xmm0
17952 ; SSE41-NEXT: pshufb %xmm0, %xmm3
17953 ; SSE41-NEXT: paddb %xmm4, %xmm3
17954 ; SSE41-NEXT: pxor %xmm0, %xmm0
17955 ; SSE41-NEXT: psadbw %xmm3, %xmm0
17956 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
17957 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
17960 ; AVX1-LABEL: ugt_5_v2i64:
17962 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17963 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
17964 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17965 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
17966 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
17967 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
17968 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
17969 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
17970 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
17971 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17972 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
17975 ; AVX2-LABEL: ugt_5_v2i64:
17977 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
17978 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
17979 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
17980 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
17981 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
17982 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
17983 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
17984 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
17985 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
17986 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
17987 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
17990 ; AVX512VPOPCNTDQ-LABEL: ugt_5_v2i64:
17991 ; AVX512VPOPCNTDQ: # %bb.0:
17992 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
17993 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
17994 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
17995 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
17996 ; AVX512VPOPCNTDQ-NEXT: retq
17998 ; AVX512VPOPCNTDQVL-LABEL: ugt_5_v2i64:
17999 ; AVX512VPOPCNTDQVL: # %bb.0:
18000 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
18001 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [5,5]
18002 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
18003 ; AVX512VPOPCNTDQVL-NEXT: retq
18005 ; BITALG_NOVLX-LABEL: ugt_5_v2i64:
18006 ; BITALG_NOVLX: # %bb.0:
18007 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18008 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
18009 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
18010 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18011 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
18012 ; BITALG_NOVLX-NEXT: vzeroupper
18013 ; BITALG_NOVLX-NEXT: retq
18015 ; BITALG-LABEL: ugt_5_v2i64:
18017 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
18018 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
18019 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18020 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [5,5]
18021 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
18022 ; BITALG-NEXT: retq
18023 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
18024 %3 = icmp ugt <2 x i64> %2, <i64 5, i64 5>
18025 %4 = sext <2 x i1> %3 to <2 x i64>
18029 define <2 x i64> @ult_6_v2i64(<2 x i64> %0) {
18030 ; SSE2-LABEL: ult_6_v2i64:
18032 ; SSE2-NEXT: movdqa %xmm0, %xmm1
18033 ; SSE2-NEXT: psrlw $1, %xmm1
18034 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18035 ; SSE2-NEXT: psubb %xmm1, %xmm0
18036 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18037 ; SSE2-NEXT: movdqa %xmm0, %xmm2
18038 ; SSE2-NEXT: pand %xmm1, %xmm2
18039 ; SSE2-NEXT: psrlw $2, %xmm0
18040 ; SSE2-NEXT: pand %xmm1, %xmm0
18041 ; SSE2-NEXT: paddb %xmm2, %xmm0
18042 ; SSE2-NEXT: movdqa %xmm0, %xmm1
18043 ; SSE2-NEXT: psrlw $4, %xmm1
18044 ; SSE2-NEXT: paddb %xmm0, %xmm1
18045 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18046 ; SSE2-NEXT: pxor %xmm0, %xmm0
18047 ; SSE2-NEXT: psadbw %xmm1, %xmm0
18048 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
18049 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6]
18050 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
18053 ; SSE3-LABEL: ult_6_v2i64:
18055 ; SSE3-NEXT: movdqa %xmm0, %xmm1
18056 ; SSE3-NEXT: psrlw $1, %xmm1
18057 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18058 ; SSE3-NEXT: psubb %xmm1, %xmm0
18059 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18060 ; SSE3-NEXT: movdqa %xmm0, %xmm2
18061 ; SSE3-NEXT: pand %xmm1, %xmm2
18062 ; SSE3-NEXT: psrlw $2, %xmm0
18063 ; SSE3-NEXT: pand %xmm1, %xmm0
18064 ; SSE3-NEXT: paddb %xmm2, %xmm0
18065 ; SSE3-NEXT: movdqa %xmm0, %xmm1
18066 ; SSE3-NEXT: psrlw $4, %xmm1
18067 ; SSE3-NEXT: paddb %xmm0, %xmm1
18068 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18069 ; SSE3-NEXT: pxor %xmm0, %xmm0
18070 ; SSE3-NEXT: psadbw %xmm1, %xmm0
18071 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
18072 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6]
18073 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
18076 ; SSSE3-LABEL: ult_6_v2i64:
18078 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18079 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
18080 ; SSSE3-NEXT: pand %xmm1, %xmm2
18081 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18082 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
18083 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
18084 ; SSSE3-NEXT: psrlw $4, %xmm0
18085 ; SSSE3-NEXT: pand %xmm1, %xmm0
18086 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
18087 ; SSSE3-NEXT: paddb %xmm4, %xmm3
18088 ; SSSE3-NEXT: pxor %xmm0, %xmm0
18089 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
18090 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
18091 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6]
18092 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
18095 ; SSE41-LABEL: ult_6_v2i64:
18097 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18098 ; SSE41-NEXT: movdqa %xmm0, %xmm2
18099 ; SSE41-NEXT: pand %xmm1, %xmm2
18100 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18101 ; SSE41-NEXT: movdqa %xmm3, %xmm4
18102 ; SSE41-NEXT: pshufb %xmm2, %xmm4
18103 ; SSE41-NEXT: psrlw $4, %xmm0
18104 ; SSE41-NEXT: pand %xmm1, %xmm0
18105 ; SSE41-NEXT: pshufb %xmm0, %xmm3
18106 ; SSE41-NEXT: paddb %xmm4, %xmm3
18107 ; SSE41-NEXT: pxor %xmm0, %xmm0
18108 ; SSE41-NEXT: psadbw %xmm3, %xmm0
18109 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
18110 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [6,6,6,6]
18111 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
18114 ; AVX1-LABEL: ult_6_v2i64:
18116 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18117 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
18118 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18119 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
18120 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
18121 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
18122 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
18123 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
18124 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
18125 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18126 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [6,6]
18127 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
18130 ; AVX2-LABEL: ult_6_v2i64:
18132 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18133 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
18134 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18135 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
18136 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
18137 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
18138 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
18139 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
18140 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
18141 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18142 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [6,6]
18143 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
18146 ; AVX512VPOPCNTDQ-LABEL: ult_6_v2i64:
18147 ; AVX512VPOPCNTDQ: # %bb.0:
18148 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18149 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
18150 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [6,6]
18151 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
18152 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
18153 ; AVX512VPOPCNTDQ-NEXT: retq
18155 ; AVX512VPOPCNTDQVL-LABEL: ult_6_v2i64:
18156 ; AVX512VPOPCNTDQVL: # %bb.0:
18157 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
18158 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [6,6]
18159 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
18160 ; AVX512VPOPCNTDQVL-NEXT: retq
18162 ; BITALG_NOVLX-LABEL: ult_6_v2i64:
18163 ; BITALG_NOVLX: # %bb.0:
18164 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18165 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
18166 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
18167 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18168 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [6,6]
18169 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
18170 ; BITALG_NOVLX-NEXT: vzeroupper
18171 ; BITALG_NOVLX-NEXT: retq
18173 ; BITALG-LABEL: ult_6_v2i64:
18175 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
18176 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
18177 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18178 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [6,6]
18179 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
18180 ; BITALG-NEXT: retq
18181 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
18182 %3 = icmp ult <2 x i64> %2, <i64 6, i64 6>
18183 %4 = sext <2 x i1> %3 to <2 x i64>
18187 define <2 x i64> @ugt_6_v2i64(<2 x i64> %0) {
18188 ; SSE2-LABEL: ugt_6_v2i64:
18190 ; SSE2-NEXT: movdqa %xmm0, %xmm1
18191 ; SSE2-NEXT: psrlw $1, %xmm1
18192 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18193 ; SSE2-NEXT: psubb %xmm1, %xmm0
18194 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18195 ; SSE2-NEXT: movdqa %xmm0, %xmm2
18196 ; SSE2-NEXT: pand %xmm1, %xmm2
18197 ; SSE2-NEXT: psrlw $2, %xmm0
18198 ; SSE2-NEXT: pand %xmm1, %xmm0
18199 ; SSE2-NEXT: paddb %xmm2, %xmm0
18200 ; SSE2-NEXT: movdqa %xmm0, %xmm1
18201 ; SSE2-NEXT: psrlw $4, %xmm1
18202 ; SSE2-NEXT: paddb %xmm0, %xmm1
18203 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18204 ; SSE2-NEXT: pxor %xmm0, %xmm0
18205 ; SSE2-NEXT: psadbw %xmm1, %xmm0
18206 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
18207 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
18210 ; SSE3-LABEL: ugt_6_v2i64:
18212 ; SSE3-NEXT: movdqa %xmm0, %xmm1
18213 ; SSE3-NEXT: psrlw $1, %xmm1
18214 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18215 ; SSE3-NEXT: psubb %xmm1, %xmm0
18216 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18217 ; SSE3-NEXT: movdqa %xmm0, %xmm2
18218 ; SSE3-NEXT: pand %xmm1, %xmm2
18219 ; SSE3-NEXT: psrlw $2, %xmm0
18220 ; SSE3-NEXT: pand %xmm1, %xmm0
18221 ; SSE3-NEXT: paddb %xmm2, %xmm0
18222 ; SSE3-NEXT: movdqa %xmm0, %xmm1
18223 ; SSE3-NEXT: psrlw $4, %xmm1
18224 ; SSE3-NEXT: paddb %xmm0, %xmm1
18225 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18226 ; SSE3-NEXT: pxor %xmm0, %xmm0
18227 ; SSE3-NEXT: psadbw %xmm1, %xmm0
18228 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
18229 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
18232 ; SSSE3-LABEL: ugt_6_v2i64:
18234 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18235 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
18236 ; SSSE3-NEXT: pand %xmm1, %xmm2
18237 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18238 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
18239 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
18240 ; SSSE3-NEXT: psrlw $4, %xmm0
18241 ; SSSE3-NEXT: pand %xmm1, %xmm0
18242 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
18243 ; SSSE3-NEXT: paddb %xmm4, %xmm3
18244 ; SSSE3-NEXT: pxor %xmm0, %xmm0
18245 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
18246 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
18247 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
18250 ; SSE41-LABEL: ugt_6_v2i64:
18252 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18253 ; SSE41-NEXT: movdqa %xmm0, %xmm2
18254 ; SSE41-NEXT: pand %xmm1, %xmm2
18255 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18256 ; SSE41-NEXT: movdqa %xmm3, %xmm4
18257 ; SSE41-NEXT: pshufb %xmm2, %xmm4
18258 ; SSE41-NEXT: psrlw $4, %xmm0
18259 ; SSE41-NEXT: pand %xmm1, %xmm0
18260 ; SSE41-NEXT: pshufb %xmm0, %xmm3
18261 ; SSE41-NEXT: paddb %xmm4, %xmm3
18262 ; SSE41-NEXT: pxor %xmm0, %xmm0
18263 ; SSE41-NEXT: psadbw %xmm3, %xmm0
18264 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
18265 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
18268 ; AVX1-LABEL: ugt_6_v2i64:
18270 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18271 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
18272 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18273 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
18274 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
18275 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
18276 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
18277 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
18278 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
18279 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18280 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
18283 ; AVX2-LABEL: ugt_6_v2i64:
18285 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18286 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
18287 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18288 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
18289 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
18290 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
18291 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
18292 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
18293 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
18294 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18295 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
18298 ; AVX512VPOPCNTDQ-LABEL: ugt_6_v2i64:
18299 ; AVX512VPOPCNTDQ: # %bb.0:
18300 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18301 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
18302 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
18303 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
18304 ; AVX512VPOPCNTDQ-NEXT: retq
18306 ; AVX512VPOPCNTDQVL-LABEL: ugt_6_v2i64:
18307 ; AVX512VPOPCNTDQVL: # %bb.0:
18308 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
18309 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [6,6]
18310 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
18311 ; AVX512VPOPCNTDQVL-NEXT: retq
18313 ; BITALG_NOVLX-LABEL: ugt_6_v2i64:
18314 ; BITALG_NOVLX: # %bb.0:
18315 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18316 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
18317 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
18318 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18319 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
18320 ; BITALG_NOVLX-NEXT: vzeroupper
18321 ; BITALG_NOVLX-NEXT: retq
18323 ; BITALG-LABEL: ugt_6_v2i64:
18325 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
18326 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
18327 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18328 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [6,6]
18329 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
18330 ; BITALG-NEXT: retq
18331 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
18332 %3 = icmp ugt <2 x i64> %2, <i64 6, i64 6>
18333 %4 = sext <2 x i1> %3 to <2 x i64>
18337 define <2 x i64> @ult_7_v2i64(<2 x i64> %0) {
18338 ; SSE2-LABEL: ult_7_v2i64:
18340 ; SSE2-NEXT: movdqa %xmm0, %xmm1
18341 ; SSE2-NEXT: psrlw $1, %xmm1
18342 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18343 ; SSE2-NEXT: psubb %xmm1, %xmm0
18344 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18345 ; SSE2-NEXT: movdqa %xmm0, %xmm2
18346 ; SSE2-NEXT: pand %xmm1, %xmm2
18347 ; SSE2-NEXT: psrlw $2, %xmm0
18348 ; SSE2-NEXT: pand %xmm1, %xmm0
18349 ; SSE2-NEXT: paddb %xmm2, %xmm0
18350 ; SSE2-NEXT: movdqa %xmm0, %xmm1
18351 ; SSE2-NEXT: psrlw $4, %xmm1
18352 ; SSE2-NEXT: paddb %xmm0, %xmm1
18353 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18354 ; SSE2-NEXT: pxor %xmm0, %xmm0
18355 ; SSE2-NEXT: psadbw %xmm1, %xmm0
18356 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
18357 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7]
18358 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
18361 ; SSE3-LABEL: ult_7_v2i64:
18363 ; SSE3-NEXT: movdqa %xmm0, %xmm1
18364 ; SSE3-NEXT: psrlw $1, %xmm1
18365 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18366 ; SSE3-NEXT: psubb %xmm1, %xmm0
18367 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18368 ; SSE3-NEXT: movdqa %xmm0, %xmm2
18369 ; SSE3-NEXT: pand %xmm1, %xmm2
18370 ; SSE3-NEXT: psrlw $2, %xmm0
18371 ; SSE3-NEXT: pand %xmm1, %xmm0
18372 ; SSE3-NEXT: paddb %xmm2, %xmm0
18373 ; SSE3-NEXT: movdqa %xmm0, %xmm1
18374 ; SSE3-NEXT: psrlw $4, %xmm1
18375 ; SSE3-NEXT: paddb %xmm0, %xmm1
18376 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18377 ; SSE3-NEXT: pxor %xmm0, %xmm0
18378 ; SSE3-NEXT: psadbw %xmm1, %xmm0
18379 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
18380 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7]
18381 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
18384 ; SSSE3-LABEL: ult_7_v2i64:
18386 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18387 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
18388 ; SSSE3-NEXT: pand %xmm1, %xmm2
18389 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18390 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
18391 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
18392 ; SSSE3-NEXT: psrlw $4, %xmm0
18393 ; SSSE3-NEXT: pand %xmm1, %xmm0
18394 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
18395 ; SSSE3-NEXT: paddb %xmm4, %xmm3
18396 ; SSSE3-NEXT: pxor %xmm0, %xmm0
18397 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
18398 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
18399 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7]
18400 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
18403 ; SSE41-LABEL: ult_7_v2i64:
18405 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18406 ; SSE41-NEXT: movdqa %xmm0, %xmm2
18407 ; SSE41-NEXT: pand %xmm1, %xmm2
18408 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18409 ; SSE41-NEXT: movdqa %xmm3, %xmm4
18410 ; SSE41-NEXT: pshufb %xmm2, %xmm4
18411 ; SSE41-NEXT: psrlw $4, %xmm0
18412 ; SSE41-NEXT: pand %xmm1, %xmm0
18413 ; SSE41-NEXT: pshufb %xmm0, %xmm3
18414 ; SSE41-NEXT: paddb %xmm4, %xmm3
18415 ; SSE41-NEXT: pxor %xmm0, %xmm0
18416 ; SSE41-NEXT: psadbw %xmm3, %xmm0
18417 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
18418 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [7,7,7,7]
18419 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
18422 ; AVX1-LABEL: ult_7_v2i64:
18424 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18425 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
18426 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18427 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
18428 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
18429 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
18430 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
18431 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
18432 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
18433 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18434 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [7,7]
18435 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
18438 ; AVX2-LABEL: ult_7_v2i64:
18440 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18441 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
18442 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18443 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
18444 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
18445 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
18446 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
18447 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
18448 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
18449 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18450 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [7,7]
18451 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
18454 ; AVX512VPOPCNTDQ-LABEL: ult_7_v2i64:
18455 ; AVX512VPOPCNTDQ: # %bb.0:
18456 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18457 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
18458 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [7,7]
18459 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
18460 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
18461 ; AVX512VPOPCNTDQ-NEXT: retq
18463 ; AVX512VPOPCNTDQVL-LABEL: ult_7_v2i64:
18464 ; AVX512VPOPCNTDQVL: # %bb.0:
18465 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
18466 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [7,7]
18467 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
18468 ; AVX512VPOPCNTDQVL-NEXT: retq
18470 ; BITALG_NOVLX-LABEL: ult_7_v2i64:
18471 ; BITALG_NOVLX: # %bb.0:
18472 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18473 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
18474 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
18475 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18476 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [7,7]
18477 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
18478 ; BITALG_NOVLX-NEXT: vzeroupper
18479 ; BITALG_NOVLX-NEXT: retq
18481 ; BITALG-LABEL: ult_7_v2i64:
18483 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
18484 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
18485 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18486 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [7,7]
18487 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
18488 ; BITALG-NEXT: retq
18489 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
18490 %3 = icmp ult <2 x i64> %2, <i64 7, i64 7>
18491 %4 = sext <2 x i1> %3 to <2 x i64>
18495 define <2 x i64> @ugt_7_v2i64(<2 x i64> %0) {
18496 ; SSE2-LABEL: ugt_7_v2i64:
18498 ; SSE2-NEXT: movdqa %xmm0, %xmm1
18499 ; SSE2-NEXT: psrlw $1, %xmm1
18500 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18501 ; SSE2-NEXT: psubb %xmm1, %xmm0
18502 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18503 ; SSE2-NEXT: movdqa %xmm0, %xmm2
18504 ; SSE2-NEXT: pand %xmm1, %xmm2
18505 ; SSE2-NEXT: psrlw $2, %xmm0
18506 ; SSE2-NEXT: pand %xmm1, %xmm0
18507 ; SSE2-NEXT: paddb %xmm2, %xmm0
18508 ; SSE2-NEXT: movdqa %xmm0, %xmm1
18509 ; SSE2-NEXT: psrlw $4, %xmm1
18510 ; SSE2-NEXT: paddb %xmm0, %xmm1
18511 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18512 ; SSE2-NEXT: pxor %xmm0, %xmm0
18513 ; SSE2-NEXT: psadbw %xmm1, %xmm0
18514 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
18515 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
18518 ; SSE3-LABEL: ugt_7_v2i64:
18520 ; SSE3-NEXT: movdqa %xmm0, %xmm1
18521 ; SSE3-NEXT: psrlw $1, %xmm1
18522 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18523 ; SSE3-NEXT: psubb %xmm1, %xmm0
18524 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18525 ; SSE3-NEXT: movdqa %xmm0, %xmm2
18526 ; SSE3-NEXT: pand %xmm1, %xmm2
18527 ; SSE3-NEXT: psrlw $2, %xmm0
18528 ; SSE3-NEXT: pand %xmm1, %xmm0
18529 ; SSE3-NEXT: paddb %xmm2, %xmm0
18530 ; SSE3-NEXT: movdqa %xmm0, %xmm1
18531 ; SSE3-NEXT: psrlw $4, %xmm1
18532 ; SSE3-NEXT: paddb %xmm0, %xmm1
18533 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18534 ; SSE3-NEXT: pxor %xmm0, %xmm0
18535 ; SSE3-NEXT: psadbw %xmm1, %xmm0
18536 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
18537 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
18540 ; SSSE3-LABEL: ugt_7_v2i64:
18542 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18543 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
18544 ; SSSE3-NEXT: pand %xmm1, %xmm2
18545 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18546 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
18547 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
18548 ; SSSE3-NEXT: psrlw $4, %xmm0
18549 ; SSSE3-NEXT: pand %xmm1, %xmm0
18550 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
18551 ; SSSE3-NEXT: paddb %xmm4, %xmm3
18552 ; SSSE3-NEXT: pxor %xmm0, %xmm0
18553 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
18554 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
18555 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
18558 ; SSE41-LABEL: ugt_7_v2i64:
18560 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18561 ; SSE41-NEXT: movdqa %xmm0, %xmm2
18562 ; SSE41-NEXT: pand %xmm1, %xmm2
18563 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18564 ; SSE41-NEXT: movdqa %xmm3, %xmm4
18565 ; SSE41-NEXT: pshufb %xmm2, %xmm4
18566 ; SSE41-NEXT: psrlw $4, %xmm0
18567 ; SSE41-NEXT: pand %xmm1, %xmm0
18568 ; SSE41-NEXT: pshufb %xmm0, %xmm3
18569 ; SSE41-NEXT: paddb %xmm4, %xmm3
18570 ; SSE41-NEXT: pxor %xmm0, %xmm0
18571 ; SSE41-NEXT: psadbw %xmm3, %xmm0
18572 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
18573 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
18576 ; AVX1-LABEL: ugt_7_v2i64:
18578 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18579 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
18580 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18581 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
18582 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
18583 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
18584 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
18585 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
18586 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
18587 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18588 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
18591 ; AVX2-LABEL: ugt_7_v2i64:
18593 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18594 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
18595 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18596 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
18597 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
18598 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
18599 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
18600 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
18601 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
18602 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18603 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
18606 ; AVX512VPOPCNTDQ-LABEL: ugt_7_v2i64:
18607 ; AVX512VPOPCNTDQ: # %bb.0:
18608 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18609 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
18610 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
18611 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
18612 ; AVX512VPOPCNTDQ-NEXT: retq
18614 ; AVX512VPOPCNTDQVL-LABEL: ugt_7_v2i64:
18615 ; AVX512VPOPCNTDQVL: # %bb.0:
18616 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
18617 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [7,7]
18618 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
18619 ; AVX512VPOPCNTDQVL-NEXT: retq
18621 ; BITALG_NOVLX-LABEL: ugt_7_v2i64:
18622 ; BITALG_NOVLX: # %bb.0:
18623 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18624 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
18625 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
18626 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18627 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
18628 ; BITALG_NOVLX-NEXT: vzeroupper
18629 ; BITALG_NOVLX-NEXT: retq
18631 ; BITALG-LABEL: ugt_7_v2i64:
18633 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
18634 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
18635 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18636 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [7,7]
18637 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
18638 ; BITALG-NEXT: retq
18639 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
18640 %3 = icmp ugt <2 x i64> %2, <i64 7, i64 7>
18641 %4 = sext <2 x i1> %3 to <2 x i64>
18645 define <2 x i64> @ult_8_v2i64(<2 x i64> %0) {
18646 ; SSE2-LABEL: ult_8_v2i64:
18648 ; SSE2-NEXT: movdqa %xmm0, %xmm1
18649 ; SSE2-NEXT: psrlw $1, %xmm1
18650 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18651 ; SSE2-NEXT: psubb %xmm1, %xmm0
18652 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18653 ; SSE2-NEXT: movdqa %xmm0, %xmm2
18654 ; SSE2-NEXT: pand %xmm1, %xmm2
18655 ; SSE2-NEXT: psrlw $2, %xmm0
18656 ; SSE2-NEXT: pand %xmm1, %xmm0
18657 ; SSE2-NEXT: paddb %xmm2, %xmm0
18658 ; SSE2-NEXT: movdqa %xmm0, %xmm1
18659 ; SSE2-NEXT: psrlw $4, %xmm1
18660 ; SSE2-NEXT: paddb %xmm0, %xmm1
18661 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18662 ; SSE2-NEXT: pxor %xmm0, %xmm0
18663 ; SSE2-NEXT: psadbw %xmm1, %xmm0
18664 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
18665 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
18666 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
18669 ; SSE3-LABEL: ult_8_v2i64:
18671 ; SSE3-NEXT: movdqa %xmm0, %xmm1
18672 ; SSE3-NEXT: psrlw $1, %xmm1
18673 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18674 ; SSE3-NEXT: psubb %xmm1, %xmm0
18675 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18676 ; SSE3-NEXT: movdqa %xmm0, %xmm2
18677 ; SSE3-NEXT: pand %xmm1, %xmm2
18678 ; SSE3-NEXT: psrlw $2, %xmm0
18679 ; SSE3-NEXT: pand %xmm1, %xmm0
18680 ; SSE3-NEXT: paddb %xmm2, %xmm0
18681 ; SSE3-NEXT: movdqa %xmm0, %xmm1
18682 ; SSE3-NEXT: psrlw $4, %xmm1
18683 ; SSE3-NEXT: paddb %xmm0, %xmm1
18684 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18685 ; SSE3-NEXT: pxor %xmm0, %xmm0
18686 ; SSE3-NEXT: psadbw %xmm1, %xmm0
18687 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
18688 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
18689 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
18692 ; SSSE3-LABEL: ult_8_v2i64:
18694 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18695 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
18696 ; SSSE3-NEXT: pand %xmm1, %xmm2
18697 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18698 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
18699 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
18700 ; SSSE3-NEXT: psrlw $4, %xmm0
18701 ; SSSE3-NEXT: pand %xmm1, %xmm0
18702 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
18703 ; SSSE3-NEXT: paddb %xmm4, %xmm3
18704 ; SSSE3-NEXT: pxor %xmm0, %xmm0
18705 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
18706 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
18707 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
18708 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
18711 ; SSE41-LABEL: ult_8_v2i64:
18713 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18714 ; SSE41-NEXT: movdqa %xmm0, %xmm2
18715 ; SSE41-NEXT: pand %xmm1, %xmm2
18716 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18717 ; SSE41-NEXT: movdqa %xmm3, %xmm4
18718 ; SSE41-NEXT: pshufb %xmm2, %xmm4
18719 ; SSE41-NEXT: psrlw $4, %xmm0
18720 ; SSE41-NEXT: pand %xmm1, %xmm0
18721 ; SSE41-NEXT: pshufb %xmm0, %xmm3
18722 ; SSE41-NEXT: paddb %xmm4, %xmm3
18723 ; SSE41-NEXT: pxor %xmm0, %xmm0
18724 ; SSE41-NEXT: psadbw %xmm3, %xmm0
18725 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
18726 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [8,8,8,8]
18727 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
18730 ; AVX1-LABEL: ult_8_v2i64:
18732 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18733 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
18734 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18735 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
18736 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
18737 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
18738 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
18739 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
18740 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
18741 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18742 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [8,8]
18743 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
18746 ; AVX2-LABEL: ult_8_v2i64:
18748 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18749 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
18750 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18751 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
18752 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
18753 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
18754 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
18755 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
18756 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
18757 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18758 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [8,8]
18759 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
18762 ; AVX512VPOPCNTDQ-LABEL: ult_8_v2i64:
18763 ; AVX512VPOPCNTDQ: # %bb.0:
18764 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18765 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
18766 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [8,8]
18767 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
18768 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
18769 ; AVX512VPOPCNTDQ-NEXT: retq
18771 ; AVX512VPOPCNTDQVL-LABEL: ult_8_v2i64:
18772 ; AVX512VPOPCNTDQVL: # %bb.0:
18773 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
18774 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [8,8]
18775 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
18776 ; AVX512VPOPCNTDQVL-NEXT: retq
18778 ; BITALG_NOVLX-LABEL: ult_8_v2i64:
18779 ; BITALG_NOVLX: # %bb.0:
18780 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18781 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
18782 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
18783 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18784 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [8,8]
18785 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
18786 ; BITALG_NOVLX-NEXT: vzeroupper
18787 ; BITALG_NOVLX-NEXT: retq
18789 ; BITALG-LABEL: ult_8_v2i64:
18791 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
18792 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
18793 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18794 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [8,8]
18795 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
18796 ; BITALG-NEXT: retq
18797 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
18798 %3 = icmp ult <2 x i64> %2, <i64 8, i64 8>
18799 %4 = sext <2 x i1> %3 to <2 x i64>
18803 define <2 x i64> @ugt_8_v2i64(<2 x i64> %0) {
18804 ; SSE2-LABEL: ugt_8_v2i64:
18806 ; SSE2-NEXT: movdqa %xmm0, %xmm1
18807 ; SSE2-NEXT: psrlw $1, %xmm1
18808 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18809 ; SSE2-NEXT: psubb %xmm1, %xmm0
18810 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18811 ; SSE2-NEXT: movdqa %xmm0, %xmm2
18812 ; SSE2-NEXT: pand %xmm1, %xmm2
18813 ; SSE2-NEXT: psrlw $2, %xmm0
18814 ; SSE2-NEXT: pand %xmm1, %xmm0
18815 ; SSE2-NEXT: paddb %xmm2, %xmm0
18816 ; SSE2-NEXT: movdqa %xmm0, %xmm1
18817 ; SSE2-NEXT: psrlw $4, %xmm1
18818 ; SSE2-NEXT: paddb %xmm0, %xmm1
18819 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18820 ; SSE2-NEXT: pxor %xmm0, %xmm0
18821 ; SSE2-NEXT: psadbw %xmm1, %xmm0
18822 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
18823 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
18826 ; SSE3-LABEL: ugt_8_v2i64:
18828 ; SSE3-NEXT: movdqa %xmm0, %xmm1
18829 ; SSE3-NEXT: psrlw $1, %xmm1
18830 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18831 ; SSE3-NEXT: psubb %xmm1, %xmm0
18832 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18833 ; SSE3-NEXT: movdqa %xmm0, %xmm2
18834 ; SSE3-NEXT: pand %xmm1, %xmm2
18835 ; SSE3-NEXT: psrlw $2, %xmm0
18836 ; SSE3-NEXT: pand %xmm1, %xmm0
18837 ; SSE3-NEXT: paddb %xmm2, %xmm0
18838 ; SSE3-NEXT: movdqa %xmm0, %xmm1
18839 ; SSE3-NEXT: psrlw $4, %xmm1
18840 ; SSE3-NEXT: paddb %xmm0, %xmm1
18841 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18842 ; SSE3-NEXT: pxor %xmm0, %xmm0
18843 ; SSE3-NEXT: psadbw %xmm1, %xmm0
18844 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
18845 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
18848 ; SSSE3-LABEL: ugt_8_v2i64:
18850 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18851 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
18852 ; SSSE3-NEXT: pand %xmm1, %xmm2
18853 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18854 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
18855 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
18856 ; SSSE3-NEXT: psrlw $4, %xmm0
18857 ; SSSE3-NEXT: pand %xmm1, %xmm0
18858 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
18859 ; SSSE3-NEXT: paddb %xmm4, %xmm3
18860 ; SSSE3-NEXT: pxor %xmm0, %xmm0
18861 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
18862 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
18863 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
18866 ; SSE41-LABEL: ugt_8_v2i64:
18868 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18869 ; SSE41-NEXT: movdqa %xmm0, %xmm2
18870 ; SSE41-NEXT: pand %xmm1, %xmm2
18871 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18872 ; SSE41-NEXT: movdqa %xmm3, %xmm4
18873 ; SSE41-NEXT: pshufb %xmm2, %xmm4
18874 ; SSE41-NEXT: psrlw $4, %xmm0
18875 ; SSE41-NEXT: pand %xmm1, %xmm0
18876 ; SSE41-NEXT: pshufb %xmm0, %xmm3
18877 ; SSE41-NEXT: paddb %xmm4, %xmm3
18878 ; SSE41-NEXT: pxor %xmm0, %xmm0
18879 ; SSE41-NEXT: psadbw %xmm3, %xmm0
18880 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
18881 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
18884 ; AVX1-LABEL: ugt_8_v2i64:
18886 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18887 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
18888 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18889 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
18890 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
18891 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
18892 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
18893 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
18894 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
18895 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18896 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
18899 ; AVX2-LABEL: ugt_8_v2i64:
18901 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
18902 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
18903 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
18904 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
18905 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
18906 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
18907 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
18908 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
18909 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
18910 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18911 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
18914 ; AVX512VPOPCNTDQ-LABEL: ugt_8_v2i64:
18915 ; AVX512VPOPCNTDQ: # %bb.0:
18916 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18917 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
18918 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
18919 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
18920 ; AVX512VPOPCNTDQ-NEXT: retq
18922 ; AVX512VPOPCNTDQVL-LABEL: ugt_8_v2i64:
18923 ; AVX512VPOPCNTDQVL: # %bb.0:
18924 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
18925 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [8,8]
18926 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
18927 ; AVX512VPOPCNTDQVL-NEXT: retq
18929 ; BITALG_NOVLX-LABEL: ugt_8_v2i64:
18930 ; BITALG_NOVLX: # %bb.0:
18931 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
18932 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
18933 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
18934 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18935 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
18936 ; BITALG_NOVLX-NEXT: vzeroupper
18937 ; BITALG_NOVLX-NEXT: retq
18939 ; BITALG-LABEL: ugt_8_v2i64:
18941 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
18942 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
18943 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
18944 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [8,8]
18945 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
18946 ; BITALG-NEXT: retq
18947 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
18948 %3 = icmp ugt <2 x i64> %2, <i64 8, i64 8>
18949 %4 = sext <2 x i1> %3 to <2 x i64>
18953 define <2 x i64> @ult_9_v2i64(<2 x i64> %0) {
18954 ; SSE2-LABEL: ult_9_v2i64:
18956 ; SSE2-NEXT: movdqa %xmm0, %xmm1
18957 ; SSE2-NEXT: psrlw $1, %xmm1
18958 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18959 ; SSE2-NEXT: psubb %xmm1, %xmm0
18960 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18961 ; SSE2-NEXT: movdqa %xmm0, %xmm2
18962 ; SSE2-NEXT: pand %xmm1, %xmm2
18963 ; SSE2-NEXT: psrlw $2, %xmm0
18964 ; SSE2-NEXT: pand %xmm1, %xmm0
18965 ; SSE2-NEXT: paddb %xmm2, %xmm0
18966 ; SSE2-NEXT: movdqa %xmm0, %xmm1
18967 ; SSE2-NEXT: psrlw $4, %xmm1
18968 ; SSE2-NEXT: paddb %xmm0, %xmm1
18969 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18970 ; SSE2-NEXT: pxor %xmm0, %xmm0
18971 ; SSE2-NEXT: psadbw %xmm1, %xmm0
18972 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
18973 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9]
18974 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
18977 ; SSE3-LABEL: ult_9_v2i64:
18979 ; SSE3-NEXT: movdqa %xmm0, %xmm1
18980 ; SSE3-NEXT: psrlw $1, %xmm1
18981 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18982 ; SSE3-NEXT: psubb %xmm1, %xmm0
18983 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
18984 ; SSE3-NEXT: movdqa %xmm0, %xmm2
18985 ; SSE3-NEXT: pand %xmm1, %xmm2
18986 ; SSE3-NEXT: psrlw $2, %xmm0
18987 ; SSE3-NEXT: pand %xmm1, %xmm0
18988 ; SSE3-NEXT: paddb %xmm2, %xmm0
18989 ; SSE3-NEXT: movdqa %xmm0, %xmm1
18990 ; SSE3-NEXT: psrlw $4, %xmm1
18991 ; SSE3-NEXT: paddb %xmm0, %xmm1
18992 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
18993 ; SSE3-NEXT: pxor %xmm0, %xmm0
18994 ; SSE3-NEXT: psadbw %xmm1, %xmm0
18995 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
18996 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9]
18997 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
19000 ; SSSE3-LABEL: ult_9_v2i64:
19002 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19003 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
19004 ; SSSE3-NEXT: pand %xmm1, %xmm2
19005 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19006 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
19007 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
19008 ; SSSE3-NEXT: psrlw $4, %xmm0
19009 ; SSSE3-NEXT: pand %xmm1, %xmm0
19010 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
19011 ; SSSE3-NEXT: paddb %xmm4, %xmm3
19012 ; SSSE3-NEXT: pxor %xmm0, %xmm0
19013 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
19014 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
19015 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [9,9,9,9]
19016 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
19019 ; SSE41-LABEL: ult_9_v2i64:
19021 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19022 ; SSE41-NEXT: movdqa %xmm0, %xmm2
19023 ; SSE41-NEXT: pand %xmm1, %xmm2
19024 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19025 ; SSE41-NEXT: movdqa %xmm3, %xmm4
19026 ; SSE41-NEXT: pshufb %xmm2, %xmm4
19027 ; SSE41-NEXT: psrlw $4, %xmm0
19028 ; SSE41-NEXT: pand %xmm1, %xmm0
19029 ; SSE41-NEXT: pshufb %xmm0, %xmm3
19030 ; SSE41-NEXT: paddb %xmm4, %xmm3
19031 ; SSE41-NEXT: pxor %xmm0, %xmm0
19032 ; SSE41-NEXT: psadbw %xmm3, %xmm0
19033 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
19034 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [9,9,9,9]
19035 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
19038 ; AVX1-LABEL: ult_9_v2i64:
19040 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19041 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
19042 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19043 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
19044 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
19045 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
19046 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
19047 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
19048 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
19049 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19050 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [9,9]
19051 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
19054 ; AVX2-LABEL: ult_9_v2i64:
19056 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19057 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
19058 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19059 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
19060 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
19061 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
19062 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
19063 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
19064 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
19065 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19066 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [9,9]
19067 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
19070 ; AVX512VPOPCNTDQ-LABEL: ult_9_v2i64:
19071 ; AVX512VPOPCNTDQ: # %bb.0:
19072 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19073 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
19074 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [9,9]
19075 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
19076 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
19077 ; AVX512VPOPCNTDQ-NEXT: retq
19079 ; AVX512VPOPCNTDQVL-LABEL: ult_9_v2i64:
19080 ; AVX512VPOPCNTDQVL: # %bb.0:
19081 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
19082 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9,9]
19083 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
19084 ; AVX512VPOPCNTDQVL-NEXT: retq
19086 ; BITALG_NOVLX-LABEL: ult_9_v2i64:
19087 ; BITALG_NOVLX: # %bb.0:
19088 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19089 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
19090 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
19091 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19092 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [9,9]
19093 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
19094 ; BITALG_NOVLX-NEXT: vzeroupper
19095 ; BITALG_NOVLX-NEXT: retq
19097 ; BITALG-LABEL: ult_9_v2i64:
19099 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
19100 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
19101 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19102 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9,9]
19103 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
19104 ; BITALG-NEXT: retq
19105 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
19106 %3 = icmp ult <2 x i64> %2, <i64 9, i64 9>
19107 %4 = sext <2 x i1> %3 to <2 x i64>
19111 define <2 x i64> @ugt_9_v2i64(<2 x i64> %0) {
19112 ; SSE2-LABEL: ugt_9_v2i64:
19114 ; SSE2-NEXT: movdqa %xmm0, %xmm1
19115 ; SSE2-NEXT: psrlw $1, %xmm1
19116 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19117 ; SSE2-NEXT: psubb %xmm1, %xmm0
19118 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19119 ; SSE2-NEXT: movdqa %xmm0, %xmm2
19120 ; SSE2-NEXT: pand %xmm1, %xmm2
19121 ; SSE2-NEXT: psrlw $2, %xmm0
19122 ; SSE2-NEXT: pand %xmm1, %xmm0
19123 ; SSE2-NEXT: paddb %xmm2, %xmm0
19124 ; SSE2-NEXT: movdqa %xmm0, %xmm1
19125 ; SSE2-NEXT: psrlw $4, %xmm1
19126 ; SSE2-NEXT: paddb %xmm0, %xmm1
19127 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19128 ; SSE2-NEXT: pxor %xmm0, %xmm0
19129 ; SSE2-NEXT: psadbw %xmm1, %xmm0
19130 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
19131 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
19134 ; SSE3-LABEL: ugt_9_v2i64:
19136 ; SSE3-NEXT: movdqa %xmm0, %xmm1
19137 ; SSE3-NEXT: psrlw $1, %xmm1
19138 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19139 ; SSE3-NEXT: psubb %xmm1, %xmm0
19140 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19141 ; SSE3-NEXT: movdqa %xmm0, %xmm2
19142 ; SSE3-NEXT: pand %xmm1, %xmm2
19143 ; SSE3-NEXT: psrlw $2, %xmm0
19144 ; SSE3-NEXT: pand %xmm1, %xmm0
19145 ; SSE3-NEXT: paddb %xmm2, %xmm0
19146 ; SSE3-NEXT: movdqa %xmm0, %xmm1
19147 ; SSE3-NEXT: psrlw $4, %xmm1
19148 ; SSE3-NEXT: paddb %xmm0, %xmm1
19149 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19150 ; SSE3-NEXT: pxor %xmm0, %xmm0
19151 ; SSE3-NEXT: psadbw %xmm1, %xmm0
19152 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
19153 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
19156 ; SSSE3-LABEL: ugt_9_v2i64:
19158 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19159 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
19160 ; SSSE3-NEXT: pand %xmm1, %xmm2
19161 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19162 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
19163 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
19164 ; SSSE3-NEXT: psrlw $4, %xmm0
19165 ; SSSE3-NEXT: pand %xmm1, %xmm0
19166 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
19167 ; SSSE3-NEXT: paddb %xmm4, %xmm3
19168 ; SSSE3-NEXT: pxor %xmm0, %xmm0
19169 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
19170 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
19171 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
19174 ; SSE41-LABEL: ugt_9_v2i64:
19176 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19177 ; SSE41-NEXT: movdqa %xmm0, %xmm2
19178 ; SSE41-NEXT: pand %xmm1, %xmm2
19179 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19180 ; SSE41-NEXT: movdqa %xmm3, %xmm4
19181 ; SSE41-NEXT: pshufb %xmm2, %xmm4
19182 ; SSE41-NEXT: psrlw $4, %xmm0
19183 ; SSE41-NEXT: pand %xmm1, %xmm0
19184 ; SSE41-NEXT: pshufb %xmm0, %xmm3
19185 ; SSE41-NEXT: paddb %xmm4, %xmm3
19186 ; SSE41-NEXT: pxor %xmm0, %xmm0
19187 ; SSE41-NEXT: psadbw %xmm3, %xmm0
19188 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
19189 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
19192 ; AVX1-LABEL: ugt_9_v2i64:
19194 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19195 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
19196 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19197 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
19198 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
19199 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
19200 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
19201 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
19202 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
19203 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19204 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
19207 ; AVX2-LABEL: ugt_9_v2i64:
19209 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19210 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
19211 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19212 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
19213 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
19214 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
19215 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
19216 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
19217 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
19218 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19219 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
19222 ; AVX512VPOPCNTDQ-LABEL: ugt_9_v2i64:
19223 ; AVX512VPOPCNTDQ: # %bb.0:
19224 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19225 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
19226 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
19227 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
19228 ; AVX512VPOPCNTDQ-NEXT: retq
19230 ; AVX512VPOPCNTDQVL-LABEL: ugt_9_v2i64:
19231 ; AVX512VPOPCNTDQVL: # %bb.0:
19232 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
19233 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9,9]
19234 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
19235 ; AVX512VPOPCNTDQVL-NEXT: retq
19237 ; BITALG_NOVLX-LABEL: ugt_9_v2i64:
19238 ; BITALG_NOVLX: # %bb.0:
19239 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19240 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
19241 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
19242 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19243 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
19244 ; BITALG_NOVLX-NEXT: vzeroupper
19245 ; BITALG_NOVLX-NEXT: retq
19247 ; BITALG-LABEL: ugt_9_v2i64:
19249 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
19250 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
19251 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19252 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9,9]
19253 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
19254 ; BITALG-NEXT: retq
19255 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
19256 %3 = icmp ugt <2 x i64> %2, <i64 9, i64 9>
19257 %4 = sext <2 x i1> %3 to <2 x i64>
19261 define <2 x i64> @ult_10_v2i64(<2 x i64> %0) {
19262 ; SSE2-LABEL: ult_10_v2i64:
19264 ; SSE2-NEXT: movdqa %xmm0, %xmm1
19265 ; SSE2-NEXT: psrlw $1, %xmm1
19266 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19267 ; SSE2-NEXT: psubb %xmm1, %xmm0
19268 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19269 ; SSE2-NEXT: movdqa %xmm0, %xmm2
19270 ; SSE2-NEXT: pand %xmm1, %xmm2
19271 ; SSE2-NEXT: psrlw $2, %xmm0
19272 ; SSE2-NEXT: pand %xmm1, %xmm0
19273 ; SSE2-NEXT: paddb %xmm2, %xmm0
19274 ; SSE2-NEXT: movdqa %xmm0, %xmm1
19275 ; SSE2-NEXT: psrlw $4, %xmm1
19276 ; SSE2-NEXT: paddb %xmm0, %xmm1
19277 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19278 ; SSE2-NEXT: pxor %xmm0, %xmm0
19279 ; SSE2-NEXT: psadbw %xmm1, %xmm0
19280 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
19281 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10]
19282 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
19285 ; SSE3-LABEL: ult_10_v2i64:
19287 ; SSE3-NEXT: movdqa %xmm0, %xmm1
19288 ; SSE3-NEXT: psrlw $1, %xmm1
19289 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19290 ; SSE3-NEXT: psubb %xmm1, %xmm0
19291 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19292 ; SSE3-NEXT: movdqa %xmm0, %xmm2
19293 ; SSE3-NEXT: pand %xmm1, %xmm2
19294 ; SSE3-NEXT: psrlw $2, %xmm0
19295 ; SSE3-NEXT: pand %xmm1, %xmm0
19296 ; SSE3-NEXT: paddb %xmm2, %xmm0
19297 ; SSE3-NEXT: movdqa %xmm0, %xmm1
19298 ; SSE3-NEXT: psrlw $4, %xmm1
19299 ; SSE3-NEXT: paddb %xmm0, %xmm1
19300 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19301 ; SSE3-NEXT: pxor %xmm0, %xmm0
19302 ; SSE3-NEXT: psadbw %xmm1, %xmm0
19303 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
19304 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10]
19305 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
19308 ; SSSE3-LABEL: ult_10_v2i64:
19310 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19311 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
19312 ; SSSE3-NEXT: pand %xmm1, %xmm2
19313 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19314 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
19315 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
19316 ; SSSE3-NEXT: psrlw $4, %xmm0
19317 ; SSSE3-NEXT: pand %xmm1, %xmm0
19318 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
19319 ; SSSE3-NEXT: paddb %xmm4, %xmm3
19320 ; SSSE3-NEXT: pxor %xmm0, %xmm0
19321 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
19322 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
19323 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [10,10,10,10]
19324 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
19327 ; SSE41-LABEL: ult_10_v2i64:
19329 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19330 ; SSE41-NEXT: movdqa %xmm0, %xmm2
19331 ; SSE41-NEXT: pand %xmm1, %xmm2
19332 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19333 ; SSE41-NEXT: movdqa %xmm3, %xmm4
19334 ; SSE41-NEXT: pshufb %xmm2, %xmm4
19335 ; SSE41-NEXT: psrlw $4, %xmm0
19336 ; SSE41-NEXT: pand %xmm1, %xmm0
19337 ; SSE41-NEXT: pshufb %xmm0, %xmm3
19338 ; SSE41-NEXT: paddb %xmm4, %xmm3
19339 ; SSE41-NEXT: pxor %xmm0, %xmm0
19340 ; SSE41-NEXT: psadbw %xmm3, %xmm0
19341 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
19342 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [10,10,10,10]
19343 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
19346 ; AVX1-LABEL: ult_10_v2i64:
19348 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19349 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
19350 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19351 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
19352 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
19353 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
19354 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
19355 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
19356 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
19357 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19358 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [10,10]
19359 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
19362 ; AVX2-LABEL: ult_10_v2i64:
19364 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19365 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
19366 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19367 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
19368 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
19369 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
19370 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
19371 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
19372 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
19373 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19374 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [10,10]
19375 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
19378 ; AVX512VPOPCNTDQ-LABEL: ult_10_v2i64:
19379 ; AVX512VPOPCNTDQ: # %bb.0:
19380 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19381 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
19382 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [10,10]
19383 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
19384 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
19385 ; AVX512VPOPCNTDQ-NEXT: retq
19387 ; AVX512VPOPCNTDQVL-LABEL: ult_10_v2i64:
19388 ; AVX512VPOPCNTDQVL: # %bb.0:
19389 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
19390 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [10,10]
19391 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
19392 ; AVX512VPOPCNTDQVL-NEXT: retq
19394 ; BITALG_NOVLX-LABEL: ult_10_v2i64:
19395 ; BITALG_NOVLX: # %bb.0:
19396 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19397 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
19398 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
19399 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19400 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [10,10]
19401 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
19402 ; BITALG_NOVLX-NEXT: vzeroupper
19403 ; BITALG_NOVLX-NEXT: retq
19405 ; BITALG-LABEL: ult_10_v2i64:
19407 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
19408 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
19409 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19410 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [10,10]
19411 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
19412 ; BITALG-NEXT: retq
19413 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
19414 %3 = icmp ult <2 x i64> %2, <i64 10, i64 10>
19415 %4 = sext <2 x i1> %3 to <2 x i64>
19419 define <2 x i64> @ugt_10_v2i64(<2 x i64> %0) {
19420 ; SSE2-LABEL: ugt_10_v2i64:
19422 ; SSE2-NEXT: movdqa %xmm0, %xmm1
19423 ; SSE2-NEXT: psrlw $1, %xmm1
19424 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19425 ; SSE2-NEXT: psubb %xmm1, %xmm0
19426 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19427 ; SSE2-NEXT: movdqa %xmm0, %xmm2
19428 ; SSE2-NEXT: pand %xmm1, %xmm2
19429 ; SSE2-NEXT: psrlw $2, %xmm0
19430 ; SSE2-NEXT: pand %xmm1, %xmm0
19431 ; SSE2-NEXT: paddb %xmm2, %xmm0
19432 ; SSE2-NEXT: movdqa %xmm0, %xmm1
19433 ; SSE2-NEXT: psrlw $4, %xmm1
19434 ; SSE2-NEXT: paddb %xmm0, %xmm1
19435 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19436 ; SSE2-NEXT: pxor %xmm0, %xmm0
19437 ; SSE2-NEXT: psadbw %xmm1, %xmm0
19438 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
19439 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
19442 ; SSE3-LABEL: ugt_10_v2i64:
19444 ; SSE3-NEXT: movdqa %xmm0, %xmm1
19445 ; SSE3-NEXT: psrlw $1, %xmm1
19446 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19447 ; SSE3-NEXT: psubb %xmm1, %xmm0
19448 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19449 ; SSE3-NEXT: movdqa %xmm0, %xmm2
19450 ; SSE3-NEXT: pand %xmm1, %xmm2
19451 ; SSE3-NEXT: psrlw $2, %xmm0
19452 ; SSE3-NEXT: pand %xmm1, %xmm0
19453 ; SSE3-NEXT: paddb %xmm2, %xmm0
19454 ; SSE3-NEXT: movdqa %xmm0, %xmm1
19455 ; SSE3-NEXT: psrlw $4, %xmm1
19456 ; SSE3-NEXT: paddb %xmm0, %xmm1
19457 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19458 ; SSE3-NEXT: pxor %xmm0, %xmm0
19459 ; SSE3-NEXT: psadbw %xmm1, %xmm0
19460 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
19461 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
19464 ; SSSE3-LABEL: ugt_10_v2i64:
19466 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19467 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
19468 ; SSSE3-NEXT: pand %xmm1, %xmm2
19469 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19470 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
19471 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
19472 ; SSSE3-NEXT: psrlw $4, %xmm0
19473 ; SSSE3-NEXT: pand %xmm1, %xmm0
19474 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
19475 ; SSSE3-NEXT: paddb %xmm4, %xmm3
19476 ; SSSE3-NEXT: pxor %xmm0, %xmm0
19477 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
19478 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
19479 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
19482 ; SSE41-LABEL: ugt_10_v2i64:
19484 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19485 ; SSE41-NEXT: movdqa %xmm0, %xmm2
19486 ; SSE41-NEXT: pand %xmm1, %xmm2
19487 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19488 ; SSE41-NEXT: movdqa %xmm3, %xmm4
19489 ; SSE41-NEXT: pshufb %xmm2, %xmm4
19490 ; SSE41-NEXT: psrlw $4, %xmm0
19491 ; SSE41-NEXT: pand %xmm1, %xmm0
19492 ; SSE41-NEXT: pshufb %xmm0, %xmm3
19493 ; SSE41-NEXT: paddb %xmm4, %xmm3
19494 ; SSE41-NEXT: pxor %xmm0, %xmm0
19495 ; SSE41-NEXT: psadbw %xmm3, %xmm0
19496 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
19497 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
19500 ; AVX1-LABEL: ugt_10_v2i64:
19502 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19503 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
19504 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19505 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
19506 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
19507 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
19508 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
19509 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
19510 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
19511 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19512 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
19515 ; AVX2-LABEL: ugt_10_v2i64:
19517 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19518 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
19519 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19520 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
19521 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
19522 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
19523 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
19524 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
19525 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
19526 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19527 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
19530 ; AVX512VPOPCNTDQ-LABEL: ugt_10_v2i64:
19531 ; AVX512VPOPCNTDQ: # %bb.0:
19532 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19533 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
19534 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
19535 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
19536 ; AVX512VPOPCNTDQ-NEXT: retq
19538 ; AVX512VPOPCNTDQVL-LABEL: ugt_10_v2i64:
19539 ; AVX512VPOPCNTDQVL: # %bb.0:
19540 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
19541 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [10,10]
19542 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
19543 ; AVX512VPOPCNTDQVL-NEXT: retq
19545 ; BITALG_NOVLX-LABEL: ugt_10_v2i64:
19546 ; BITALG_NOVLX: # %bb.0:
19547 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19548 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
19549 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
19550 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19551 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
19552 ; BITALG_NOVLX-NEXT: vzeroupper
19553 ; BITALG_NOVLX-NEXT: retq
19555 ; BITALG-LABEL: ugt_10_v2i64:
19557 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
19558 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
19559 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19560 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [10,10]
19561 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
19562 ; BITALG-NEXT: retq
19563 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
19564 %3 = icmp ugt <2 x i64> %2, <i64 10, i64 10>
19565 %4 = sext <2 x i1> %3 to <2 x i64>
19569 define <2 x i64> @ult_11_v2i64(<2 x i64> %0) {
19570 ; SSE2-LABEL: ult_11_v2i64:
19572 ; SSE2-NEXT: movdqa %xmm0, %xmm1
19573 ; SSE2-NEXT: psrlw $1, %xmm1
19574 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19575 ; SSE2-NEXT: psubb %xmm1, %xmm0
19576 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19577 ; SSE2-NEXT: movdqa %xmm0, %xmm2
19578 ; SSE2-NEXT: pand %xmm1, %xmm2
19579 ; SSE2-NEXT: psrlw $2, %xmm0
19580 ; SSE2-NEXT: pand %xmm1, %xmm0
19581 ; SSE2-NEXT: paddb %xmm2, %xmm0
19582 ; SSE2-NEXT: movdqa %xmm0, %xmm1
19583 ; SSE2-NEXT: psrlw $4, %xmm1
19584 ; SSE2-NEXT: paddb %xmm0, %xmm1
19585 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19586 ; SSE2-NEXT: pxor %xmm0, %xmm0
19587 ; SSE2-NEXT: psadbw %xmm1, %xmm0
19588 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
19589 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11]
19590 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
19593 ; SSE3-LABEL: ult_11_v2i64:
19595 ; SSE3-NEXT: movdqa %xmm0, %xmm1
19596 ; SSE3-NEXT: psrlw $1, %xmm1
19597 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19598 ; SSE3-NEXT: psubb %xmm1, %xmm0
19599 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19600 ; SSE3-NEXT: movdqa %xmm0, %xmm2
19601 ; SSE3-NEXT: pand %xmm1, %xmm2
19602 ; SSE3-NEXT: psrlw $2, %xmm0
19603 ; SSE3-NEXT: pand %xmm1, %xmm0
19604 ; SSE3-NEXT: paddb %xmm2, %xmm0
19605 ; SSE3-NEXT: movdqa %xmm0, %xmm1
19606 ; SSE3-NEXT: psrlw $4, %xmm1
19607 ; SSE3-NEXT: paddb %xmm0, %xmm1
19608 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19609 ; SSE3-NEXT: pxor %xmm0, %xmm0
19610 ; SSE3-NEXT: psadbw %xmm1, %xmm0
19611 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
19612 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11]
19613 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
19616 ; SSSE3-LABEL: ult_11_v2i64:
19618 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19619 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
19620 ; SSSE3-NEXT: pand %xmm1, %xmm2
19621 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19622 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
19623 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
19624 ; SSSE3-NEXT: psrlw $4, %xmm0
19625 ; SSSE3-NEXT: pand %xmm1, %xmm0
19626 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
19627 ; SSSE3-NEXT: paddb %xmm4, %xmm3
19628 ; SSSE3-NEXT: pxor %xmm0, %xmm0
19629 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
19630 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
19631 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [11,11,11,11]
19632 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
19635 ; SSE41-LABEL: ult_11_v2i64:
19637 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19638 ; SSE41-NEXT: movdqa %xmm0, %xmm2
19639 ; SSE41-NEXT: pand %xmm1, %xmm2
19640 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19641 ; SSE41-NEXT: movdqa %xmm3, %xmm4
19642 ; SSE41-NEXT: pshufb %xmm2, %xmm4
19643 ; SSE41-NEXT: psrlw $4, %xmm0
19644 ; SSE41-NEXT: pand %xmm1, %xmm0
19645 ; SSE41-NEXT: pshufb %xmm0, %xmm3
19646 ; SSE41-NEXT: paddb %xmm4, %xmm3
19647 ; SSE41-NEXT: pxor %xmm0, %xmm0
19648 ; SSE41-NEXT: psadbw %xmm3, %xmm0
19649 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
19650 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [11,11,11,11]
19651 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
19654 ; AVX1-LABEL: ult_11_v2i64:
19656 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19657 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
19658 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19659 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
19660 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
19661 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
19662 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
19663 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
19664 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
19665 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19666 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [11,11]
19667 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
19670 ; AVX2-LABEL: ult_11_v2i64:
19672 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19673 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
19674 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19675 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
19676 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
19677 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
19678 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
19679 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
19680 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
19681 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19682 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [11,11]
19683 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
19686 ; AVX512VPOPCNTDQ-LABEL: ult_11_v2i64:
19687 ; AVX512VPOPCNTDQ: # %bb.0:
19688 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19689 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
19690 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [11,11]
19691 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
19692 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
19693 ; AVX512VPOPCNTDQ-NEXT: retq
19695 ; AVX512VPOPCNTDQVL-LABEL: ult_11_v2i64:
19696 ; AVX512VPOPCNTDQVL: # %bb.0:
19697 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
19698 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [11,11]
19699 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
19700 ; AVX512VPOPCNTDQVL-NEXT: retq
19702 ; BITALG_NOVLX-LABEL: ult_11_v2i64:
19703 ; BITALG_NOVLX: # %bb.0:
19704 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19705 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
19706 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
19707 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19708 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [11,11]
19709 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
19710 ; BITALG_NOVLX-NEXT: vzeroupper
19711 ; BITALG_NOVLX-NEXT: retq
19713 ; BITALG-LABEL: ult_11_v2i64:
19715 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
19716 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
19717 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19718 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [11,11]
19719 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
19720 ; BITALG-NEXT: retq
19721 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
19722 %3 = icmp ult <2 x i64> %2, <i64 11, i64 11>
19723 %4 = sext <2 x i1> %3 to <2 x i64>
19727 define <2 x i64> @ugt_11_v2i64(<2 x i64> %0) {
19728 ; SSE2-LABEL: ugt_11_v2i64:
19730 ; SSE2-NEXT: movdqa %xmm0, %xmm1
19731 ; SSE2-NEXT: psrlw $1, %xmm1
19732 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19733 ; SSE2-NEXT: psubb %xmm1, %xmm0
19734 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19735 ; SSE2-NEXT: movdqa %xmm0, %xmm2
19736 ; SSE2-NEXT: pand %xmm1, %xmm2
19737 ; SSE2-NEXT: psrlw $2, %xmm0
19738 ; SSE2-NEXT: pand %xmm1, %xmm0
19739 ; SSE2-NEXT: paddb %xmm2, %xmm0
19740 ; SSE2-NEXT: movdqa %xmm0, %xmm1
19741 ; SSE2-NEXT: psrlw $4, %xmm1
19742 ; SSE2-NEXT: paddb %xmm0, %xmm1
19743 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19744 ; SSE2-NEXT: pxor %xmm0, %xmm0
19745 ; SSE2-NEXT: psadbw %xmm1, %xmm0
19746 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
19747 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
19750 ; SSE3-LABEL: ugt_11_v2i64:
19752 ; SSE3-NEXT: movdqa %xmm0, %xmm1
19753 ; SSE3-NEXT: psrlw $1, %xmm1
19754 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19755 ; SSE3-NEXT: psubb %xmm1, %xmm0
19756 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19757 ; SSE3-NEXT: movdqa %xmm0, %xmm2
19758 ; SSE3-NEXT: pand %xmm1, %xmm2
19759 ; SSE3-NEXT: psrlw $2, %xmm0
19760 ; SSE3-NEXT: pand %xmm1, %xmm0
19761 ; SSE3-NEXT: paddb %xmm2, %xmm0
19762 ; SSE3-NEXT: movdqa %xmm0, %xmm1
19763 ; SSE3-NEXT: psrlw $4, %xmm1
19764 ; SSE3-NEXT: paddb %xmm0, %xmm1
19765 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19766 ; SSE3-NEXT: pxor %xmm0, %xmm0
19767 ; SSE3-NEXT: psadbw %xmm1, %xmm0
19768 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
19769 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
19772 ; SSSE3-LABEL: ugt_11_v2i64:
19774 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19775 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
19776 ; SSSE3-NEXT: pand %xmm1, %xmm2
19777 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19778 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
19779 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
19780 ; SSSE3-NEXT: psrlw $4, %xmm0
19781 ; SSSE3-NEXT: pand %xmm1, %xmm0
19782 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
19783 ; SSSE3-NEXT: paddb %xmm4, %xmm3
19784 ; SSSE3-NEXT: pxor %xmm0, %xmm0
19785 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
19786 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
19787 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
19790 ; SSE41-LABEL: ugt_11_v2i64:
19792 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19793 ; SSE41-NEXT: movdqa %xmm0, %xmm2
19794 ; SSE41-NEXT: pand %xmm1, %xmm2
19795 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19796 ; SSE41-NEXT: movdqa %xmm3, %xmm4
19797 ; SSE41-NEXT: pshufb %xmm2, %xmm4
19798 ; SSE41-NEXT: psrlw $4, %xmm0
19799 ; SSE41-NEXT: pand %xmm1, %xmm0
19800 ; SSE41-NEXT: pshufb %xmm0, %xmm3
19801 ; SSE41-NEXT: paddb %xmm4, %xmm3
19802 ; SSE41-NEXT: pxor %xmm0, %xmm0
19803 ; SSE41-NEXT: psadbw %xmm3, %xmm0
19804 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
19805 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
19808 ; AVX1-LABEL: ugt_11_v2i64:
19810 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19811 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
19812 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19813 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
19814 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
19815 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
19816 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
19817 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
19818 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
19819 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19820 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
19823 ; AVX2-LABEL: ugt_11_v2i64:
19825 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19826 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
19827 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19828 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
19829 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
19830 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
19831 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
19832 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
19833 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
19834 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19835 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
19838 ; AVX512VPOPCNTDQ-LABEL: ugt_11_v2i64:
19839 ; AVX512VPOPCNTDQ: # %bb.0:
19840 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19841 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
19842 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
19843 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
19844 ; AVX512VPOPCNTDQ-NEXT: retq
19846 ; AVX512VPOPCNTDQVL-LABEL: ugt_11_v2i64:
19847 ; AVX512VPOPCNTDQVL: # %bb.0:
19848 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
19849 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [11,11]
19850 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
19851 ; AVX512VPOPCNTDQVL-NEXT: retq
19853 ; BITALG_NOVLX-LABEL: ugt_11_v2i64:
19854 ; BITALG_NOVLX: # %bb.0:
19855 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19856 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
19857 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
19858 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19859 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
19860 ; BITALG_NOVLX-NEXT: vzeroupper
19861 ; BITALG_NOVLX-NEXT: retq
19863 ; BITALG-LABEL: ugt_11_v2i64:
19865 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
19866 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
19867 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19868 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [11,11]
19869 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
19870 ; BITALG-NEXT: retq
19871 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
19872 %3 = icmp ugt <2 x i64> %2, <i64 11, i64 11>
19873 %4 = sext <2 x i1> %3 to <2 x i64>
19877 define <2 x i64> @ult_12_v2i64(<2 x i64> %0) {
19878 ; SSE2-LABEL: ult_12_v2i64:
19880 ; SSE2-NEXT: movdqa %xmm0, %xmm1
19881 ; SSE2-NEXT: psrlw $1, %xmm1
19882 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19883 ; SSE2-NEXT: psubb %xmm1, %xmm0
19884 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19885 ; SSE2-NEXT: movdqa %xmm0, %xmm2
19886 ; SSE2-NEXT: pand %xmm1, %xmm2
19887 ; SSE2-NEXT: psrlw $2, %xmm0
19888 ; SSE2-NEXT: pand %xmm1, %xmm0
19889 ; SSE2-NEXT: paddb %xmm2, %xmm0
19890 ; SSE2-NEXT: movdqa %xmm0, %xmm1
19891 ; SSE2-NEXT: psrlw $4, %xmm1
19892 ; SSE2-NEXT: paddb %xmm0, %xmm1
19893 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19894 ; SSE2-NEXT: pxor %xmm0, %xmm0
19895 ; SSE2-NEXT: psadbw %xmm1, %xmm0
19896 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
19897 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12]
19898 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
19901 ; SSE3-LABEL: ult_12_v2i64:
19903 ; SSE3-NEXT: movdqa %xmm0, %xmm1
19904 ; SSE3-NEXT: psrlw $1, %xmm1
19905 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19906 ; SSE3-NEXT: psubb %xmm1, %xmm0
19907 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
19908 ; SSE3-NEXT: movdqa %xmm0, %xmm2
19909 ; SSE3-NEXT: pand %xmm1, %xmm2
19910 ; SSE3-NEXT: psrlw $2, %xmm0
19911 ; SSE3-NEXT: pand %xmm1, %xmm0
19912 ; SSE3-NEXT: paddb %xmm2, %xmm0
19913 ; SSE3-NEXT: movdqa %xmm0, %xmm1
19914 ; SSE3-NEXT: psrlw $4, %xmm1
19915 ; SSE3-NEXT: paddb %xmm0, %xmm1
19916 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
19917 ; SSE3-NEXT: pxor %xmm0, %xmm0
19918 ; SSE3-NEXT: psadbw %xmm1, %xmm0
19919 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
19920 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12]
19921 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
19924 ; SSSE3-LABEL: ult_12_v2i64:
19926 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19927 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
19928 ; SSSE3-NEXT: pand %xmm1, %xmm2
19929 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19930 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
19931 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
19932 ; SSSE3-NEXT: psrlw $4, %xmm0
19933 ; SSSE3-NEXT: pand %xmm1, %xmm0
19934 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
19935 ; SSSE3-NEXT: paddb %xmm4, %xmm3
19936 ; SSSE3-NEXT: pxor %xmm0, %xmm0
19937 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
19938 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
19939 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [12,12,12,12]
19940 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
19943 ; SSE41-LABEL: ult_12_v2i64:
19945 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19946 ; SSE41-NEXT: movdqa %xmm0, %xmm2
19947 ; SSE41-NEXT: pand %xmm1, %xmm2
19948 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19949 ; SSE41-NEXT: movdqa %xmm3, %xmm4
19950 ; SSE41-NEXT: pshufb %xmm2, %xmm4
19951 ; SSE41-NEXT: psrlw $4, %xmm0
19952 ; SSE41-NEXT: pand %xmm1, %xmm0
19953 ; SSE41-NEXT: pshufb %xmm0, %xmm3
19954 ; SSE41-NEXT: paddb %xmm4, %xmm3
19955 ; SSE41-NEXT: pxor %xmm0, %xmm0
19956 ; SSE41-NEXT: psadbw %xmm3, %xmm0
19957 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
19958 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [12,12,12,12]
19959 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
19962 ; AVX1-LABEL: ult_12_v2i64:
19964 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19965 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
19966 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19967 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
19968 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
19969 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
19970 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
19971 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
19972 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
19973 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19974 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [12,12]
19975 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
19978 ; AVX2-LABEL: ult_12_v2i64:
19980 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
19981 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
19982 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
19983 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
19984 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
19985 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
19986 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
19987 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
19988 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
19989 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
19990 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [12,12]
19991 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
19994 ; AVX512VPOPCNTDQ-LABEL: ult_12_v2i64:
19995 ; AVX512VPOPCNTDQ: # %bb.0:
19996 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
19997 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
19998 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [12,12]
19999 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
20000 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
20001 ; AVX512VPOPCNTDQ-NEXT: retq
20003 ; AVX512VPOPCNTDQVL-LABEL: ult_12_v2i64:
20004 ; AVX512VPOPCNTDQVL: # %bb.0:
20005 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
20006 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [12,12]
20007 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
20008 ; AVX512VPOPCNTDQVL-NEXT: retq
20010 ; BITALG_NOVLX-LABEL: ult_12_v2i64:
20011 ; BITALG_NOVLX: # %bb.0:
20012 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20013 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
20014 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
20015 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20016 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [12,12]
20017 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
20018 ; BITALG_NOVLX-NEXT: vzeroupper
20019 ; BITALG_NOVLX-NEXT: retq
20021 ; BITALG-LABEL: ult_12_v2i64:
20023 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
20024 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
20025 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20026 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [12,12]
20027 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
20028 ; BITALG-NEXT: retq
20029 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
20030 %3 = icmp ult <2 x i64> %2, <i64 12, i64 12>
20031 %4 = sext <2 x i1> %3 to <2 x i64>
20035 define <2 x i64> @ugt_12_v2i64(<2 x i64> %0) {
20036 ; SSE2-LABEL: ugt_12_v2i64:
20038 ; SSE2-NEXT: movdqa %xmm0, %xmm1
20039 ; SSE2-NEXT: psrlw $1, %xmm1
20040 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20041 ; SSE2-NEXT: psubb %xmm1, %xmm0
20042 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20043 ; SSE2-NEXT: movdqa %xmm0, %xmm2
20044 ; SSE2-NEXT: pand %xmm1, %xmm2
20045 ; SSE2-NEXT: psrlw $2, %xmm0
20046 ; SSE2-NEXT: pand %xmm1, %xmm0
20047 ; SSE2-NEXT: paddb %xmm2, %xmm0
20048 ; SSE2-NEXT: movdqa %xmm0, %xmm1
20049 ; SSE2-NEXT: psrlw $4, %xmm1
20050 ; SSE2-NEXT: paddb %xmm0, %xmm1
20051 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20052 ; SSE2-NEXT: pxor %xmm0, %xmm0
20053 ; SSE2-NEXT: psadbw %xmm1, %xmm0
20054 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
20055 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
20058 ; SSE3-LABEL: ugt_12_v2i64:
20060 ; SSE3-NEXT: movdqa %xmm0, %xmm1
20061 ; SSE3-NEXT: psrlw $1, %xmm1
20062 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20063 ; SSE3-NEXT: psubb %xmm1, %xmm0
20064 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20065 ; SSE3-NEXT: movdqa %xmm0, %xmm2
20066 ; SSE3-NEXT: pand %xmm1, %xmm2
20067 ; SSE3-NEXT: psrlw $2, %xmm0
20068 ; SSE3-NEXT: pand %xmm1, %xmm0
20069 ; SSE3-NEXT: paddb %xmm2, %xmm0
20070 ; SSE3-NEXT: movdqa %xmm0, %xmm1
20071 ; SSE3-NEXT: psrlw $4, %xmm1
20072 ; SSE3-NEXT: paddb %xmm0, %xmm1
20073 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20074 ; SSE3-NEXT: pxor %xmm0, %xmm0
20075 ; SSE3-NEXT: psadbw %xmm1, %xmm0
20076 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
20077 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
20080 ; SSSE3-LABEL: ugt_12_v2i64:
20082 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20083 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
20084 ; SSSE3-NEXT: pand %xmm1, %xmm2
20085 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20086 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
20087 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
20088 ; SSSE3-NEXT: psrlw $4, %xmm0
20089 ; SSSE3-NEXT: pand %xmm1, %xmm0
20090 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
20091 ; SSSE3-NEXT: paddb %xmm4, %xmm3
20092 ; SSSE3-NEXT: pxor %xmm0, %xmm0
20093 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
20094 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
20095 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
20098 ; SSE41-LABEL: ugt_12_v2i64:
20100 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20101 ; SSE41-NEXT: movdqa %xmm0, %xmm2
20102 ; SSE41-NEXT: pand %xmm1, %xmm2
20103 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20104 ; SSE41-NEXT: movdqa %xmm3, %xmm4
20105 ; SSE41-NEXT: pshufb %xmm2, %xmm4
20106 ; SSE41-NEXT: psrlw $4, %xmm0
20107 ; SSE41-NEXT: pand %xmm1, %xmm0
20108 ; SSE41-NEXT: pshufb %xmm0, %xmm3
20109 ; SSE41-NEXT: paddb %xmm4, %xmm3
20110 ; SSE41-NEXT: pxor %xmm0, %xmm0
20111 ; SSE41-NEXT: psadbw %xmm3, %xmm0
20112 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
20113 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
20116 ; AVX1-LABEL: ugt_12_v2i64:
20118 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20119 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
20120 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20121 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
20122 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
20123 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
20124 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
20125 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
20126 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
20127 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20128 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
20131 ; AVX2-LABEL: ugt_12_v2i64:
20133 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20134 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
20135 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20136 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
20137 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
20138 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
20139 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
20140 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
20141 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
20142 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20143 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
20146 ; AVX512VPOPCNTDQ-LABEL: ugt_12_v2i64:
20147 ; AVX512VPOPCNTDQ: # %bb.0:
20148 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20149 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
20150 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
20151 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
20152 ; AVX512VPOPCNTDQ-NEXT: retq
20154 ; AVX512VPOPCNTDQVL-LABEL: ugt_12_v2i64:
20155 ; AVX512VPOPCNTDQVL: # %bb.0:
20156 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
20157 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [12,12]
20158 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
20159 ; AVX512VPOPCNTDQVL-NEXT: retq
20161 ; BITALG_NOVLX-LABEL: ugt_12_v2i64:
20162 ; BITALG_NOVLX: # %bb.0:
20163 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20164 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
20165 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
20166 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20167 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
20168 ; BITALG_NOVLX-NEXT: vzeroupper
20169 ; BITALG_NOVLX-NEXT: retq
20171 ; BITALG-LABEL: ugt_12_v2i64:
20173 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
20174 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
20175 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20176 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [12,12]
20177 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
20178 ; BITALG-NEXT: retq
20179 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
20180 %3 = icmp ugt <2 x i64> %2, <i64 12, i64 12>
20181 %4 = sext <2 x i1> %3 to <2 x i64>
20185 define <2 x i64> @ult_13_v2i64(<2 x i64> %0) {
20186 ; SSE2-LABEL: ult_13_v2i64:
20188 ; SSE2-NEXT: movdqa %xmm0, %xmm1
20189 ; SSE2-NEXT: psrlw $1, %xmm1
20190 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20191 ; SSE2-NEXT: psubb %xmm1, %xmm0
20192 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20193 ; SSE2-NEXT: movdqa %xmm0, %xmm2
20194 ; SSE2-NEXT: pand %xmm1, %xmm2
20195 ; SSE2-NEXT: psrlw $2, %xmm0
20196 ; SSE2-NEXT: pand %xmm1, %xmm0
20197 ; SSE2-NEXT: paddb %xmm2, %xmm0
20198 ; SSE2-NEXT: movdqa %xmm0, %xmm1
20199 ; SSE2-NEXT: psrlw $4, %xmm1
20200 ; SSE2-NEXT: paddb %xmm0, %xmm1
20201 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20202 ; SSE2-NEXT: pxor %xmm0, %xmm0
20203 ; SSE2-NEXT: psadbw %xmm1, %xmm0
20204 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
20205 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13]
20206 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
20209 ; SSE3-LABEL: ult_13_v2i64:
20211 ; SSE3-NEXT: movdqa %xmm0, %xmm1
20212 ; SSE3-NEXT: psrlw $1, %xmm1
20213 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20214 ; SSE3-NEXT: psubb %xmm1, %xmm0
20215 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20216 ; SSE3-NEXT: movdqa %xmm0, %xmm2
20217 ; SSE3-NEXT: pand %xmm1, %xmm2
20218 ; SSE3-NEXT: psrlw $2, %xmm0
20219 ; SSE3-NEXT: pand %xmm1, %xmm0
20220 ; SSE3-NEXT: paddb %xmm2, %xmm0
20221 ; SSE3-NEXT: movdqa %xmm0, %xmm1
20222 ; SSE3-NEXT: psrlw $4, %xmm1
20223 ; SSE3-NEXT: paddb %xmm0, %xmm1
20224 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20225 ; SSE3-NEXT: pxor %xmm0, %xmm0
20226 ; SSE3-NEXT: psadbw %xmm1, %xmm0
20227 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
20228 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13]
20229 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
20232 ; SSSE3-LABEL: ult_13_v2i64:
20234 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20235 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
20236 ; SSSE3-NEXT: pand %xmm1, %xmm2
20237 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20238 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
20239 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
20240 ; SSSE3-NEXT: psrlw $4, %xmm0
20241 ; SSSE3-NEXT: pand %xmm1, %xmm0
20242 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
20243 ; SSSE3-NEXT: paddb %xmm4, %xmm3
20244 ; SSSE3-NEXT: pxor %xmm0, %xmm0
20245 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
20246 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
20247 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [13,13,13,13]
20248 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
20251 ; SSE41-LABEL: ult_13_v2i64:
20253 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20254 ; SSE41-NEXT: movdqa %xmm0, %xmm2
20255 ; SSE41-NEXT: pand %xmm1, %xmm2
20256 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20257 ; SSE41-NEXT: movdqa %xmm3, %xmm4
20258 ; SSE41-NEXT: pshufb %xmm2, %xmm4
20259 ; SSE41-NEXT: psrlw $4, %xmm0
20260 ; SSE41-NEXT: pand %xmm1, %xmm0
20261 ; SSE41-NEXT: pshufb %xmm0, %xmm3
20262 ; SSE41-NEXT: paddb %xmm4, %xmm3
20263 ; SSE41-NEXT: pxor %xmm0, %xmm0
20264 ; SSE41-NEXT: psadbw %xmm3, %xmm0
20265 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
20266 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [13,13,13,13]
20267 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
20270 ; AVX1-LABEL: ult_13_v2i64:
20272 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20273 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
20274 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20275 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
20276 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
20277 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
20278 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
20279 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
20280 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
20281 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20282 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [13,13]
20283 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
20286 ; AVX2-LABEL: ult_13_v2i64:
20288 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20289 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
20290 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20291 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
20292 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
20293 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
20294 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
20295 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
20296 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
20297 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20298 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [13,13]
20299 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
20302 ; AVX512VPOPCNTDQ-LABEL: ult_13_v2i64:
20303 ; AVX512VPOPCNTDQ: # %bb.0:
20304 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20305 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
20306 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [13,13]
20307 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
20308 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
20309 ; AVX512VPOPCNTDQ-NEXT: retq
20311 ; AVX512VPOPCNTDQVL-LABEL: ult_13_v2i64:
20312 ; AVX512VPOPCNTDQVL: # %bb.0:
20313 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
20314 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [13,13]
20315 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
20316 ; AVX512VPOPCNTDQVL-NEXT: retq
20318 ; BITALG_NOVLX-LABEL: ult_13_v2i64:
20319 ; BITALG_NOVLX: # %bb.0:
20320 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20321 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
20322 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
20323 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20324 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [13,13]
20325 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
20326 ; BITALG_NOVLX-NEXT: vzeroupper
20327 ; BITALG_NOVLX-NEXT: retq
20329 ; BITALG-LABEL: ult_13_v2i64:
20331 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
20332 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
20333 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20334 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [13,13]
20335 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
20336 ; BITALG-NEXT: retq
20337 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
20338 %3 = icmp ult <2 x i64> %2, <i64 13, i64 13>
20339 %4 = sext <2 x i1> %3 to <2 x i64>
20343 define <2 x i64> @ugt_13_v2i64(<2 x i64> %0) {
20344 ; SSE2-LABEL: ugt_13_v2i64:
20346 ; SSE2-NEXT: movdqa %xmm0, %xmm1
20347 ; SSE2-NEXT: psrlw $1, %xmm1
20348 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20349 ; SSE2-NEXT: psubb %xmm1, %xmm0
20350 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20351 ; SSE2-NEXT: movdqa %xmm0, %xmm2
20352 ; SSE2-NEXT: pand %xmm1, %xmm2
20353 ; SSE2-NEXT: psrlw $2, %xmm0
20354 ; SSE2-NEXT: pand %xmm1, %xmm0
20355 ; SSE2-NEXT: paddb %xmm2, %xmm0
20356 ; SSE2-NEXT: movdqa %xmm0, %xmm1
20357 ; SSE2-NEXT: psrlw $4, %xmm1
20358 ; SSE2-NEXT: paddb %xmm0, %xmm1
20359 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20360 ; SSE2-NEXT: pxor %xmm0, %xmm0
20361 ; SSE2-NEXT: psadbw %xmm1, %xmm0
20362 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
20363 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
20366 ; SSE3-LABEL: ugt_13_v2i64:
20368 ; SSE3-NEXT: movdqa %xmm0, %xmm1
20369 ; SSE3-NEXT: psrlw $1, %xmm1
20370 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20371 ; SSE3-NEXT: psubb %xmm1, %xmm0
20372 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20373 ; SSE3-NEXT: movdqa %xmm0, %xmm2
20374 ; SSE3-NEXT: pand %xmm1, %xmm2
20375 ; SSE3-NEXT: psrlw $2, %xmm0
20376 ; SSE3-NEXT: pand %xmm1, %xmm0
20377 ; SSE3-NEXT: paddb %xmm2, %xmm0
20378 ; SSE3-NEXT: movdqa %xmm0, %xmm1
20379 ; SSE3-NEXT: psrlw $4, %xmm1
20380 ; SSE3-NEXT: paddb %xmm0, %xmm1
20381 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20382 ; SSE3-NEXT: pxor %xmm0, %xmm0
20383 ; SSE3-NEXT: psadbw %xmm1, %xmm0
20384 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
20385 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
20388 ; SSSE3-LABEL: ugt_13_v2i64:
20390 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20391 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
20392 ; SSSE3-NEXT: pand %xmm1, %xmm2
20393 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20394 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
20395 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
20396 ; SSSE3-NEXT: psrlw $4, %xmm0
20397 ; SSSE3-NEXT: pand %xmm1, %xmm0
20398 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
20399 ; SSSE3-NEXT: paddb %xmm4, %xmm3
20400 ; SSSE3-NEXT: pxor %xmm0, %xmm0
20401 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
20402 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
20403 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
20406 ; SSE41-LABEL: ugt_13_v2i64:
20408 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20409 ; SSE41-NEXT: movdqa %xmm0, %xmm2
20410 ; SSE41-NEXT: pand %xmm1, %xmm2
20411 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20412 ; SSE41-NEXT: movdqa %xmm3, %xmm4
20413 ; SSE41-NEXT: pshufb %xmm2, %xmm4
20414 ; SSE41-NEXT: psrlw $4, %xmm0
20415 ; SSE41-NEXT: pand %xmm1, %xmm0
20416 ; SSE41-NEXT: pshufb %xmm0, %xmm3
20417 ; SSE41-NEXT: paddb %xmm4, %xmm3
20418 ; SSE41-NEXT: pxor %xmm0, %xmm0
20419 ; SSE41-NEXT: psadbw %xmm3, %xmm0
20420 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
20421 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
20424 ; AVX1-LABEL: ugt_13_v2i64:
20426 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20427 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
20428 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20429 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
20430 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
20431 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
20432 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
20433 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
20434 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
20435 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20436 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
20439 ; AVX2-LABEL: ugt_13_v2i64:
20441 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20442 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
20443 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20444 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
20445 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
20446 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
20447 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
20448 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
20449 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
20450 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20451 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
20454 ; AVX512VPOPCNTDQ-LABEL: ugt_13_v2i64:
20455 ; AVX512VPOPCNTDQ: # %bb.0:
20456 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20457 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
20458 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
20459 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
20460 ; AVX512VPOPCNTDQ-NEXT: retq
20462 ; AVX512VPOPCNTDQVL-LABEL: ugt_13_v2i64:
20463 ; AVX512VPOPCNTDQVL: # %bb.0:
20464 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
20465 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [13,13]
20466 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
20467 ; AVX512VPOPCNTDQVL-NEXT: retq
20469 ; BITALG_NOVLX-LABEL: ugt_13_v2i64:
20470 ; BITALG_NOVLX: # %bb.0:
20471 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20472 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
20473 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
20474 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20475 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
20476 ; BITALG_NOVLX-NEXT: vzeroupper
20477 ; BITALG_NOVLX-NEXT: retq
20479 ; BITALG-LABEL: ugt_13_v2i64:
20481 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
20482 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
20483 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20484 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [13,13]
20485 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
20486 ; BITALG-NEXT: retq
20487 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
20488 %3 = icmp ugt <2 x i64> %2, <i64 13, i64 13>
20489 %4 = sext <2 x i1> %3 to <2 x i64>
20493 define <2 x i64> @ult_14_v2i64(<2 x i64> %0) {
20494 ; SSE2-LABEL: ult_14_v2i64:
20496 ; SSE2-NEXT: movdqa %xmm0, %xmm1
20497 ; SSE2-NEXT: psrlw $1, %xmm1
20498 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20499 ; SSE2-NEXT: psubb %xmm1, %xmm0
20500 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20501 ; SSE2-NEXT: movdqa %xmm0, %xmm2
20502 ; SSE2-NEXT: pand %xmm1, %xmm2
20503 ; SSE2-NEXT: psrlw $2, %xmm0
20504 ; SSE2-NEXT: pand %xmm1, %xmm0
20505 ; SSE2-NEXT: paddb %xmm2, %xmm0
20506 ; SSE2-NEXT: movdqa %xmm0, %xmm1
20507 ; SSE2-NEXT: psrlw $4, %xmm1
20508 ; SSE2-NEXT: paddb %xmm0, %xmm1
20509 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20510 ; SSE2-NEXT: pxor %xmm0, %xmm0
20511 ; SSE2-NEXT: psadbw %xmm1, %xmm0
20512 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
20513 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14]
20514 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
20517 ; SSE3-LABEL: ult_14_v2i64:
20519 ; SSE3-NEXT: movdqa %xmm0, %xmm1
20520 ; SSE3-NEXT: psrlw $1, %xmm1
20521 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20522 ; SSE3-NEXT: psubb %xmm1, %xmm0
20523 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20524 ; SSE3-NEXT: movdqa %xmm0, %xmm2
20525 ; SSE3-NEXT: pand %xmm1, %xmm2
20526 ; SSE3-NEXT: psrlw $2, %xmm0
20527 ; SSE3-NEXT: pand %xmm1, %xmm0
20528 ; SSE3-NEXT: paddb %xmm2, %xmm0
20529 ; SSE3-NEXT: movdqa %xmm0, %xmm1
20530 ; SSE3-NEXT: psrlw $4, %xmm1
20531 ; SSE3-NEXT: paddb %xmm0, %xmm1
20532 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20533 ; SSE3-NEXT: pxor %xmm0, %xmm0
20534 ; SSE3-NEXT: psadbw %xmm1, %xmm0
20535 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
20536 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14]
20537 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
20540 ; SSSE3-LABEL: ult_14_v2i64:
20542 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20543 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
20544 ; SSSE3-NEXT: pand %xmm1, %xmm2
20545 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20546 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
20547 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
20548 ; SSSE3-NEXT: psrlw $4, %xmm0
20549 ; SSSE3-NEXT: pand %xmm1, %xmm0
20550 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
20551 ; SSSE3-NEXT: paddb %xmm4, %xmm3
20552 ; SSSE3-NEXT: pxor %xmm0, %xmm0
20553 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
20554 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
20555 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [14,14,14,14]
20556 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
20559 ; SSE41-LABEL: ult_14_v2i64:
20561 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20562 ; SSE41-NEXT: movdqa %xmm0, %xmm2
20563 ; SSE41-NEXT: pand %xmm1, %xmm2
20564 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20565 ; SSE41-NEXT: movdqa %xmm3, %xmm4
20566 ; SSE41-NEXT: pshufb %xmm2, %xmm4
20567 ; SSE41-NEXT: psrlw $4, %xmm0
20568 ; SSE41-NEXT: pand %xmm1, %xmm0
20569 ; SSE41-NEXT: pshufb %xmm0, %xmm3
20570 ; SSE41-NEXT: paddb %xmm4, %xmm3
20571 ; SSE41-NEXT: pxor %xmm0, %xmm0
20572 ; SSE41-NEXT: psadbw %xmm3, %xmm0
20573 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
20574 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [14,14,14,14]
20575 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
20578 ; AVX1-LABEL: ult_14_v2i64:
20580 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20581 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
20582 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20583 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
20584 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
20585 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
20586 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
20587 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
20588 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
20589 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20590 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [14,14]
20591 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
20594 ; AVX2-LABEL: ult_14_v2i64:
20596 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20597 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
20598 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20599 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
20600 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
20601 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
20602 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
20603 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
20604 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
20605 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20606 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [14,14]
20607 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
20610 ; AVX512VPOPCNTDQ-LABEL: ult_14_v2i64:
20611 ; AVX512VPOPCNTDQ: # %bb.0:
20612 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20613 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
20614 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [14,14]
20615 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
20616 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
20617 ; AVX512VPOPCNTDQ-NEXT: retq
20619 ; AVX512VPOPCNTDQVL-LABEL: ult_14_v2i64:
20620 ; AVX512VPOPCNTDQVL: # %bb.0:
20621 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
20622 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [14,14]
20623 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
20624 ; AVX512VPOPCNTDQVL-NEXT: retq
20626 ; BITALG_NOVLX-LABEL: ult_14_v2i64:
20627 ; BITALG_NOVLX: # %bb.0:
20628 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20629 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
20630 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
20631 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20632 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [14,14]
20633 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
20634 ; BITALG_NOVLX-NEXT: vzeroupper
20635 ; BITALG_NOVLX-NEXT: retq
20637 ; BITALG-LABEL: ult_14_v2i64:
20639 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
20640 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
20641 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20642 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [14,14]
20643 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
20644 ; BITALG-NEXT: retq
20645 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
20646 %3 = icmp ult <2 x i64> %2, <i64 14, i64 14>
20647 %4 = sext <2 x i1> %3 to <2 x i64>
20651 define <2 x i64> @ugt_14_v2i64(<2 x i64> %0) {
20652 ; SSE2-LABEL: ugt_14_v2i64:
20654 ; SSE2-NEXT: movdqa %xmm0, %xmm1
20655 ; SSE2-NEXT: psrlw $1, %xmm1
20656 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20657 ; SSE2-NEXT: psubb %xmm1, %xmm0
20658 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20659 ; SSE2-NEXT: movdqa %xmm0, %xmm2
20660 ; SSE2-NEXT: pand %xmm1, %xmm2
20661 ; SSE2-NEXT: psrlw $2, %xmm0
20662 ; SSE2-NEXT: pand %xmm1, %xmm0
20663 ; SSE2-NEXT: paddb %xmm2, %xmm0
20664 ; SSE2-NEXT: movdqa %xmm0, %xmm1
20665 ; SSE2-NEXT: psrlw $4, %xmm1
20666 ; SSE2-NEXT: paddb %xmm0, %xmm1
20667 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20668 ; SSE2-NEXT: pxor %xmm0, %xmm0
20669 ; SSE2-NEXT: psadbw %xmm1, %xmm0
20670 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
20671 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
20674 ; SSE3-LABEL: ugt_14_v2i64:
20676 ; SSE3-NEXT: movdqa %xmm0, %xmm1
20677 ; SSE3-NEXT: psrlw $1, %xmm1
20678 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20679 ; SSE3-NEXT: psubb %xmm1, %xmm0
20680 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20681 ; SSE3-NEXT: movdqa %xmm0, %xmm2
20682 ; SSE3-NEXT: pand %xmm1, %xmm2
20683 ; SSE3-NEXT: psrlw $2, %xmm0
20684 ; SSE3-NEXT: pand %xmm1, %xmm0
20685 ; SSE3-NEXT: paddb %xmm2, %xmm0
20686 ; SSE3-NEXT: movdqa %xmm0, %xmm1
20687 ; SSE3-NEXT: psrlw $4, %xmm1
20688 ; SSE3-NEXT: paddb %xmm0, %xmm1
20689 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20690 ; SSE3-NEXT: pxor %xmm0, %xmm0
20691 ; SSE3-NEXT: psadbw %xmm1, %xmm0
20692 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
20693 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
20696 ; SSSE3-LABEL: ugt_14_v2i64:
20698 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20699 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
20700 ; SSSE3-NEXT: pand %xmm1, %xmm2
20701 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20702 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
20703 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
20704 ; SSSE3-NEXT: psrlw $4, %xmm0
20705 ; SSSE3-NEXT: pand %xmm1, %xmm0
20706 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
20707 ; SSSE3-NEXT: paddb %xmm4, %xmm3
20708 ; SSSE3-NEXT: pxor %xmm0, %xmm0
20709 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
20710 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
20711 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
20714 ; SSE41-LABEL: ugt_14_v2i64:
20716 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20717 ; SSE41-NEXT: movdqa %xmm0, %xmm2
20718 ; SSE41-NEXT: pand %xmm1, %xmm2
20719 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20720 ; SSE41-NEXT: movdqa %xmm3, %xmm4
20721 ; SSE41-NEXT: pshufb %xmm2, %xmm4
20722 ; SSE41-NEXT: psrlw $4, %xmm0
20723 ; SSE41-NEXT: pand %xmm1, %xmm0
20724 ; SSE41-NEXT: pshufb %xmm0, %xmm3
20725 ; SSE41-NEXT: paddb %xmm4, %xmm3
20726 ; SSE41-NEXT: pxor %xmm0, %xmm0
20727 ; SSE41-NEXT: psadbw %xmm3, %xmm0
20728 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
20729 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
20732 ; AVX1-LABEL: ugt_14_v2i64:
20734 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20735 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
20736 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20737 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
20738 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
20739 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
20740 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
20741 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
20742 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
20743 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20744 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
20747 ; AVX2-LABEL: ugt_14_v2i64:
20749 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20750 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
20751 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20752 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
20753 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
20754 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
20755 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
20756 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
20757 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
20758 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20759 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
20762 ; AVX512VPOPCNTDQ-LABEL: ugt_14_v2i64:
20763 ; AVX512VPOPCNTDQ: # %bb.0:
20764 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20765 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
20766 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
20767 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
20768 ; AVX512VPOPCNTDQ-NEXT: retq
20770 ; AVX512VPOPCNTDQVL-LABEL: ugt_14_v2i64:
20771 ; AVX512VPOPCNTDQVL: # %bb.0:
20772 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
20773 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [14,14]
20774 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
20775 ; AVX512VPOPCNTDQVL-NEXT: retq
20777 ; BITALG_NOVLX-LABEL: ugt_14_v2i64:
20778 ; BITALG_NOVLX: # %bb.0:
20779 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20780 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
20781 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
20782 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20783 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
20784 ; BITALG_NOVLX-NEXT: vzeroupper
20785 ; BITALG_NOVLX-NEXT: retq
20787 ; BITALG-LABEL: ugt_14_v2i64:
20789 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
20790 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
20791 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20792 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [14,14]
20793 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
20794 ; BITALG-NEXT: retq
20795 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
20796 %3 = icmp ugt <2 x i64> %2, <i64 14, i64 14>
20797 %4 = sext <2 x i1> %3 to <2 x i64>
20801 define <2 x i64> @ult_15_v2i64(<2 x i64> %0) {
20802 ; SSE2-LABEL: ult_15_v2i64:
20804 ; SSE2-NEXT: movdqa %xmm0, %xmm1
20805 ; SSE2-NEXT: psrlw $1, %xmm1
20806 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20807 ; SSE2-NEXT: psubb %xmm1, %xmm0
20808 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20809 ; SSE2-NEXT: movdqa %xmm0, %xmm2
20810 ; SSE2-NEXT: pand %xmm1, %xmm2
20811 ; SSE2-NEXT: psrlw $2, %xmm0
20812 ; SSE2-NEXT: pand %xmm1, %xmm0
20813 ; SSE2-NEXT: paddb %xmm2, %xmm0
20814 ; SSE2-NEXT: movdqa %xmm0, %xmm1
20815 ; SSE2-NEXT: psrlw $4, %xmm1
20816 ; SSE2-NEXT: paddb %xmm0, %xmm1
20817 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20818 ; SSE2-NEXT: pxor %xmm0, %xmm0
20819 ; SSE2-NEXT: psadbw %xmm1, %xmm0
20820 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
20821 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15]
20822 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
20825 ; SSE3-LABEL: ult_15_v2i64:
20827 ; SSE3-NEXT: movdqa %xmm0, %xmm1
20828 ; SSE3-NEXT: psrlw $1, %xmm1
20829 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20830 ; SSE3-NEXT: psubb %xmm1, %xmm0
20831 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20832 ; SSE3-NEXT: movdqa %xmm0, %xmm2
20833 ; SSE3-NEXT: pand %xmm1, %xmm2
20834 ; SSE3-NEXT: psrlw $2, %xmm0
20835 ; SSE3-NEXT: pand %xmm1, %xmm0
20836 ; SSE3-NEXT: paddb %xmm2, %xmm0
20837 ; SSE3-NEXT: movdqa %xmm0, %xmm1
20838 ; SSE3-NEXT: psrlw $4, %xmm1
20839 ; SSE3-NEXT: paddb %xmm0, %xmm1
20840 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20841 ; SSE3-NEXT: pxor %xmm0, %xmm0
20842 ; SSE3-NEXT: psadbw %xmm1, %xmm0
20843 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
20844 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15]
20845 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
20848 ; SSSE3-LABEL: ult_15_v2i64:
20850 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20851 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
20852 ; SSSE3-NEXT: pand %xmm1, %xmm2
20853 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20854 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
20855 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
20856 ; SSSE3-NEXT: psrlw $4, %xmm0
20857 ; SSSE3-NEXT: pand %xmm1, %xmm0
20858 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
20859 ; SSSE3-NEXT: paddb %xmm4, %xmm3
20860 ; SSSE3-NEXT: pxor %xmm0, %xmm0
20861 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
20862 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
20863 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15]
20864 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
20867 ; SSE41-LABEL: ult_15_v2i64:
20869 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20870 ; SSE41-NEXT: movdqa %xmm0, %xmm2
20871 ; SSE41-NEXT: pand %xmm1, %xmm2
20872 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20873 ; SSE41-NEXT: movdqa %xmm3, %xmm4
20874 ; SSE41-NEXT: pshufb %xmm2, %xmm4
20875 ; SSE41-NEXT: psrlw $4, %xmm0
20876 ; SSE41-NEXT: pand %xmm1, %xmm0
20877 ; SSE41-NEXT: pshufb %xmm0, %xmm3
20878 ; SSE41-NEXT: paddb %xmm4, %xmm3
20879 ; SSE41-NEXT: pxor %xmm0, %xmm0
20880 ; SSE41-NEXT: psadbw %xmm3, %xmm0
20881 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
20882 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [15,15,15,15]
20883 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
20886 ; AVX1-LABEL: ult_15_v2i64:
20888 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20889 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
20890 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20891 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
20892 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
20893 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
20894 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
20895 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
20896 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
20897 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20898 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [15,15]
20899 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
20902 ; AVX2-LABEL: ult_15_v2i64:
20904 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
20905 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
20906 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
20907 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
20908 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
20909 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
20910 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
20911 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
20912 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
20913 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20914 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [15,15]
20915 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
20918 ; AVX512VPOPCNTDQ-LABEL: ult_15_v2i64:
20919 ; AVX512VPOPCNTDQ: # %bb.0:
20920 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20921 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
20922 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [15,15]
20923 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
20924 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
20925 ; AVX512VPOPCNTDQ-NEXT: retq
20927 ; AVX512VPOPCNTDQVL-LABEL: ult_15_v2i64:
20928 ; AVX512VPOPCNTDQVL: # %bb.0:
20929 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
20930 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [15,15]
20931 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
20932 ; AVX512VPOPCNTDQVL-NEXT: retq
20934 ; BITALG_NOVLX-LABEL: ult_15_v2i64:
20935 ; BITALG_NOVLX: # %bb.0:
20936 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
20937 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
20938 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
20939 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20940 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [15,15]
20941 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
20942 ; BITALG_NOVLX-NEXT: vzeroupper
20943 ; BITALG_NOVLX-NEXT: retq
20945 ; BITALG-LABEL: ult_15_v2i64:
20947 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
20948 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
20949 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
20950 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [15,15]
20951 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
20952 ; BITALG-NEXT: retq
20953 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
20954 %3 = icmp ult <2 x i64> %2, <i64 15, i64 15>
20955 %4 = sext <2 x i1> %3 to <2 x i64>
20959 define <2 x i64> @ugt_15_v2i64(<2 x i64> %0) {
20960 ; SSE2-LABEL: ugt_15_v2i64:
20962 ; SSE2-NEXT: movdqa %xmm0, %xmm1
20963 ; SSE2-NEXT: psrlw $1, %xmm1
20964 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20965 ; SSE2-NEXT: psubb %xmm1, %xmm0
20966 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20967 ; SSE2-NEXT: movdqa %xmm0, %xmm2
20968 ; SSE2-NEXT: pand %xmm1, %xmm2
20969 ; SSE2-NEXT: psrlw $2, %xmm0
20970 ; SSE2-NEXT: pand %xmm1, %xmm0
20971 ; SSE2-NEXT: paddb %xmm2, %xmm0
20972 ; SSE2-NEXT: movdqa %xmm0, %xmm1
20973 ; SSE2-NEXT: psrlw $4, %xmm1
20974 ; SSE2-NEXT: paddb %xmm0, %xmm1
20975 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20976 ; SSE2-NEXT: pxor %xmm0, %xmm0
20977 ; SSE2-NEXT: psadbw %xmm1, %xmm0
20978 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
20979 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
20982 ; SSE3-LABEL: ugt_15_v2i64:
20984 ; SSE3-NEXT: movdqa %xmm0, %xmm1
20985 ; SSE3-NEXT: psrlw $1, %xmm1
20986 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20987 ; SSE3-NEXT: psubb %xmm1, %xmm0
20988 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
20989 ; SSE3-NEXT: movdqa %xmm0, %xmm2
20990 ; SSE3-NEXT: pand %xmm1, %xmm2
20991 ; SSE3-NEXT: psrlw $2, %xmm0
20992 ; SSE3-NEXT: pand %xmm1, %xmm0
20993 ; SSE3-NEXT: paddb %xmm2, %xmm0
20994 ; SSE3-NEXT: movdqa %xmm0, %xmm1
20995 ; SSE3-NEXT: psrlw $4, %xmm1
20996 ; SSE3-NEXT: paddb %xmm0, %xmm1
20997 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
20998 ; SSE3-NEXT: pxor %xmm0, %xmm0
20999 ; SSE3-NEXT: psadbw %xmm1, %xmm0
21000 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
21001 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
21004 ; SSSE3-LABEL: ugt_15_v2i64:
21006 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21007 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
21008 ; SSSE3-NEXT: pand %xmm1, %xmm2
21009 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21010 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
21011 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
21012 ; SSSE3-NEXT: psrlw $4, %xmm0
21013 ; SSSE3-NEXT: pand %xmm1, %xmm0
21014 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
21015 ; SSSE3-NEXT: paddb %xmm4, %xmm3
21016 ; SSSE3-NEXT: pxor %xmm0, %xmm0
21017 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
21018 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
21019 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
21022 ; SSE41-LABEL: ugt_15_v2i64:
21024 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21025 ; SSE41-NEXT: movdqa %xmm0, %xmm2
21026 ; SSE41-NEXT: pand %xmm1, %xmm2
21027 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21028 ; SSE41-NEXT: movdqa %xmm3, %xmm4
21029 ; SSE41-NEXT: pshufb %xmm2, %xmm4
21030 ; SSE41-NEXT: psrlw $4, %xmm0
21031 ; SSE41-NEXT: pand %xmm1, %xmm0
21032 ; SSE41-NEXT: pshufb %xmm0, %xmm3
21033 ; SSE41-NEXT: paddb %xmm4, %xmm3
21034 ; SSE41-NEXT: pxor %xmm0, %xmm0
21035 ; SSE41-NEXT: psadbw %xmm3, %xmm0
21036 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
21037 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
21040 ; AVX1-LABEL: ugt_15_v2i64:
21042 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21043 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
21044 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21045 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
21046 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
21047 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
21048 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
21049 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
21050 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
21051 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21052 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
21055 ; AVX2-LABEL: ugt_15_v2i64:
21057 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21058 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
21059 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21060 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
21061 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
21062 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
21063 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
21064 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
21065 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
21066 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21067 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
21070 ; AVX512VPOPCNTDQ-LABEL: ugt_15_v2i64:
21071 ; AVX512VPOPCNTDQ: # %bb.0:
21072 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21073 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
21074 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
21075 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
21076 ; AVX512VPOPCNTDQ-NEXT: retq
21078 ; AVX512VPOPCNTDQVL-LABEL: ugt_15_v2i64:
21079 ; AVX512VPOPCNTDQVL: # %bb.0:
21080 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
21081 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [15,15]
21082 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
21083 ; AVX512VPOPCNTDQVL-NEXT: retq
21085 ; BITALG_NOVLX-LABEL: ugt_15_v2i64:
21086 ; BITALG_NOVLX: # %bb.0:
21087 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21088 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
21089 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
21090 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21091 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
21092 ; BITALG_NOVLX-NEXT: vzeroupper
21093 ; BITALG_NOVLX-NEXT: retq
21095 ; BITALG-LABEL: ugt_15_v2i64:
21097 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
21098 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
21099 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21100 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [15,15]
21101 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
21102 ; BITALG-NEXT: retq
21103 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
21104 %3 = icmp ugt <2 x i64> %2, <i64 15, i64 15>
21105 %4 = sext <2 x i1> %3 to <2 x i64>
21109 define <2 x i64> @ult_16_v2i64(<2 x i64> %0) {
21110 ; SSE2-LABEL: ult_16_v2i64:
21112 ; SSE2-NEXT: movdqa %xmm0, %xmm1
21113 ; SSE2-NEXT: psrlw $1, %xmm1
21114 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21115 ; SSE2-NEXT: psubb %xmm1, %xmm0
21116 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21117 ; SSE2-NEXT: movdqa %xmm0, %xmm2
21118 ; SSE2-NEXT: pand %xmm1, %xmm2
21119 ; SSE2-NEXT: psrlw $2, %xmm0
21120 ; SSE2-NEXT: pand %xmm1, %xmm0
21121 ; SSE2-NEXT: paddb %xmm2, %xmm0
21122 ; SSE2-NEXT: movdqa %xmm0, %xmm1
21123 ; SSE2-NEXT: psrlw $4, %xmm1
21124 ; SSE2-NEXT: paddb %xmm0, %xmm1
21125 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21126 ; SSE2-NEXT: pxor %xmm0, %xmm0
21127 ; SSE2-NEXT: psadbw %xmm1, %xmm0
21128 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
21129 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [16,16,16,16]
21130 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
21133 ; SSE3-LABEL: ult_16_v2i64:
21135 ; SSE3-NEXT: movdqa %xmm0, %xmm1
21136 ; SSE3-NEXT: psrlw $1, %xmm1
21137 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21138 ; SSE3-NEXT: psubb %xmm1, %xmm0
21139 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21140 ; SSE3-NEXT: movdqa %xmm0, %xmm2
21141 ; SSE3-NEXT: pand %xmm1, %xmm2
21142 ; SSE3-NEXT: psrlw $2, %xmm0
21143 ; SSE3-NEXT: pand %xmm1, %xmm0
21144 ; SSE3-NEXT: paddb %xmm2, %xmm0
21145 ; SSE3-NEXT: movdqa %xmm0, %xmm1
21146 ; SSE3-NEXT: psrlw $4, %xmm1
21147 ; SSE3-NEXT: paddb %xmm0, %xmm1
21148 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21149 ; SSE3-NEXT: pxor %xmm0, %xmm0
21150 ; SSE3-NEXT: psadbw %xmm1, %xmm0
21151 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
21152 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [16,16,16,16]
21153 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
21156 ; SSSE3-LABEL: ult_16_v2i64:
21158 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21159 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
21160 ; SSSE3-NEXT: pand %xmm1, %xmm2
21161 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21162 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
21163 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
21164 ; SSSE3-NEXT: psrlw $4, %xmm0
21165 ; SSSE3-NEXT: pand %xmm1, %xmm0
21166 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
21167 ; SSSE3-NEXT: paddb %xmm4, %xmm3
21168 ; SSSE3-NEXT: pxor %xmm0, %xmm0
21169 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
21170 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
21171 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [16,16,16,16]
21172 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
21175 ; SSE41-LABEL: ult_16_v2i64:
21177 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21178 ; SSE41-NEXT: movdqa %xmm0, %xmm2
21179 ; SSE41-NEXT: pand %xmm1, %xmm2
21180 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21181 ; SSE41-NEXT: movdqa %xmm3, %xmm4
21182 ; SSE41-NEXT: pshufb %xmm2, %xmm4
21183 ; SSE41-NEXT: psrlw $4, %xmm0
21184 ; SSE41-NEXT: pand %xmm1, %xmm0
21185 ; SSE41-NEXT: pshufb %xmm0, %xmm3
21186 ; SSE41-NEXT: paddb %xmm4, %xmm3
21187 ; SSE41-NEXT: pxor %xmm0, %xmm0
21188 ; SSE41-NEXT: psadbw %xmm3, %xmm0
21189 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
21190 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [16,16,16,16]
21191 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
21194 ; AVX1-LABEL: ult_16_v2i64:
21196 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21197 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
21198 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21199 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
21200 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
21201 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
21202 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
21203 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
21204 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
21205 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21206 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [16,16]
21207 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
21210 ; AVX2-LABEL: ult_16_v2i64:
21212 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21213 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
21214 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21215 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
21216 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
21217 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
21218 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
21219 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
21220 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
21221 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21222 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [16,16]
21223 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
21226 ; AVX512VPOPCNTDQ-LABEL: ult_16_v2i64:
21227 ; AVX512VPOPCNTDQ: # %bb.0:
21228 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21229 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
21230 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [16,16]
21231 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
21232 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
21233 ; AVX512VPOPCNTDQ-NEXT: retq
21235 ; AVX512VPOPCNTDQVL-LABEL: ult_16_v2i64:
21236 ; AVX512VPOPCNTDQVL: # %bb.0:
21237 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
21238 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [16,16]
21239 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
21240 ; AVX512VPOPCNTDQVL-NEXT: retq
21242 ; BITALG_NOVLX-LABEL: ult_16_v2i64:
21243 ; BITALG_NOVLX: # %bb.0:
21244 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21245 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
21246 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
21247 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21248 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [16,16]
21249 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
21250 ; BITALG_NOVLX-NEXT: vzeroupper
21251 ; BITALG_NOVLX-NEXT: retq
21253 ; BITALG-LABEL: ult_16_v2i64:
21255 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
21256 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
21257 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21258 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [16,16]
21259 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
21260 ; BITALG-NEXT: retq
21261 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
21262 %3 = icmp ult <2 x i64> %2, <i64 16, i64 16>
21263 %4 = sext <2 x i1> %3 to <2 x i64>
21267 define <2 x i64> @ugt_16_v2i64(<2 x i64> %0) {
21268 ; SSE2-LABEL: ugt_16_v2i64:
21270 ; SSE2-NEXT: movdqa %xmm0, %xmm1
21271 ; SSE2-NEXT: psrlw $1, %xmm1
21272 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21273 ; SSE2-NEXT: psubb %xmm1, %xmm0
21274 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21275 ; SSE2-NEXT: movdqa %xmm0, %xmm2
21276 ; SSE2-NEXT: pand %xmm1, %xmm2
21277 ; SSE2-NEXT: psrlw $2, %xmm0
21278 ; SSE2-NEXT: pand %xmm1, %xmm0
21279 ; SSE2-NEXT: paddb %xmm2, %xmm0
21280 ; SSE2-NEXT: movdqa %xmm0, %xmm1
21281 ; SSE2-NEXT: psrlw $4, %xmm1
21282 ; SSE2-NEXT: paddb %xmm0, %xmm1
21283 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21284 ; SSE2-NEXT: pxor %xmm0, %xmm0
21285 ; SSE2-NEXT: psadbw %xmm1, %xmm0
21286 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
21287 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
21290 ; SSE3-LABEL: ugt_16_v2i64:
21292 ; SSE3-NEXT: movdqa %xmm0, %xmm1
21293 ; SSE3-NEXT: psrlw $1, %xmm1
21294 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21295 ; SSE3-NEXT: psubb %xmm1, %xmm0
21296 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21297 ; SSE3-NEXT: movdqa %xmm0, %xmm2
21298 ; SSE3-NEXT: pand %xmm1, %xmm2
21299 ; SSE3-NEXT: psrlw $2, %xmm0
21300 ; SSE3-NEXT: pand %xmm1, %xmm0
21301 ; SSE3-NEXT: paddb %xmm2, %xmm0
21302 ; SSE3-NEXT: movdqa %xmm0, %xmm1
21303 ; SSE3-NEXT: psrlw $4, %xmm1
21304 ; SSE3-NEXT: paddb %xmm0, %xmm1
21305 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21306 ; SSE3-NEXT: pxor %xmm0, %xmm0
21307 ; SSE3-NEXT: psadbw %xmm1, %xmm0
21308 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
21309 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
21312 ; SSSE3-LABEL: ugt_16_v2i64:
21314 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21315 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
21316 ; SSSE3-NEXT: pand %xmm1, %xmm2
21317 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21318 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
21319 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
21320 ; SSSE3-NEXT: psrlw $4, %xmm0
21321 ; SSSE3-NEXT: pand %xmm1, %xmm0
21322 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
21323 ; SSSE3-NEXT: paddb %xmm4, %xmm3
21324 ; SSSE3-NEXT: pxor %xmm0, %xmm0
21325 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
21326 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
21327 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
21330 ; SSE41-LABEL: ugt_16_v2i64:
21332 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21333 ; SSE41-NEXT: movdqa %xmm0, %xmm2
21334 ; SSE41-NEXT: pand %xmm1, %xmm2
21335 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21336 ; SSE41-NEXT: movdqa %xmm3, %xmm4
21337 ; SSE41-NEXT: pshufb %xmm2, %xmm4
21338 ; SSE41-NEXT: psrlw $4, %xmm0
21339 ; SSE41-NEXT: pand %xmm1, %xmm0
21340 ; SSE41-NEXT: pshufb %xmm0, %xmm3
21341 ; SSE41-NEXT: paddb %xmm4, %xmm3
21342 ; SSE41-NEXT: pxor %xmm0, %xmm0
21343 ; SSE41-NEXT: psadbw %xmm3, %xmm0
21344 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
21345 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
21348 ; AVX1-LABEL: ugt_16_v2i64:
21350 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21351 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
21352 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21353 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
21354 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
21355 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
21356 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
21357 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
21358 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
21359 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21360 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
21363 ; AVX2-LABEL: ugt_16_v2i64:
21365 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21366 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
21367 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21368 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
21369 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
21370 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
21371 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
21372 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
21373 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
21374 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21375 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
21378 ; AVX512VPOPCNTDQ-LABEL: ugt_16_v2i64:
21379 ; AVX512VPOPCNTDQ: # %bb.0:
21380 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21381 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
21382 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
21383 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
21384 ; AVX512VPOPCNTDQ-NEXT: retq
21386 ; AVX512VPOPCNTDQVL-LABEL: ugt_16_v2i64:
21387 ; AVX512VPOPCNTDQVL: # %bb.0:
21388 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
21389 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [16,16]
21390 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
21391 ; AVX512VPOPCNTDQVL-NEXT: retq
21393 ; BITALG_NOVLX-LABEL: ugt_16_v2i64:
21394 ; BITALG_NOVLX: # %bb.0:
21395 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21396 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
21397 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
21398 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21399 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
21400 ; BITALG_NOVLX-NEXT: vzeroupper
21401 ; BITALG_NOVLX-NEXT: retq
21403 ; BITALG-LABEL: ugt_16_v2i64:
21405 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
21406 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
21407 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21408 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [16,16]
21409 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
21410 ; BITALG-NEXT: retq
21411 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
21412 %3 = icmp ugt <2 x i64> %2, <i64 16, i64 16>
21413 %4 = sext <2 x i1> %3 to <2 x i64>
21417 define <2 x i64> @ult_17_v2i64(<2 x i64> %0) {
21418 ; SSE2-LABEL: ult_17_v2i64:
21420 ; SSE2-NEXT: movdqa %xmm0, %xmm1
21421 ; SSE2-NEXT: psrlw $1, %xmm1
21422 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21423 ; SSE2-NEXT: psubb %xmm1, %xmm0
21424 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21425 ; SSE2-NEXT: movdqa %xmm0, %xmm2
21426 ; SSE2-NEXT: pand %xmm1, %xmm2
21427 ; SSE2-NEXT: psrlw $2, %xmm0
21428 ; SSE2-NEXT: pand %xmm1, %xmm0
21429 ; SSE2-NEXT: paddb %xmm2, %xmm0
21430 ; SSE2-NEXT: movdqa %xmm0, %xmm1
21431 ; SSE2-NEXT: psrlw $4, %xmm1
21432 ; SSE2-NEXT: paddb %xmm0, %xmm1
21433 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21434 ; SSE2-NEXT: pxor %xmm0, %xmm0
21435 ; SSE2-NEXT: psadbw %xmm1, %xmm0
21436 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
21437 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [17,17,17,17]
21438 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
21441 ; SSE3-LABEL: ult_17_v2i64:
21443 ; SSE3-NEXT: movdqa %xmm0, %xmm1
21444 ; SSE3-NEXT: psrlw $1, %xmm1
21445 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21446 ; SSE3-NEXT: psubb %xmm1, %xmm0
21447 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21448 ; SSE3-NEXT: movdqa %xmm0, %xmm2
21449 ; SSE3-NEXT: pand %xmm1, %xmm2
21450 ; SSE3-NEXT: psrlw $2, %xmm0
21451 ; SSE3-NEXT: pand %xmm1, %xmm0
21452 ; SSE3-NEXT: paddb %xmm2, %xmm0
21453 ; SSE3-NEXT: movdqa %xmm0, %xmm1
21454 ; SSE3-NEXT: psrlw $4, %xmm1
21455 ; SSE3-NEXT: paddb %xmm0, %xmm1
21456 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21457 ; SSE3-NEXT: pxor %xmm0, %xmm0
21458 ; SSE3-NEXT: psadbw %xmm1, %xmm0
21459 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
21460 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [17,17,17,17]
21461 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
21464 ; SSSE3-LABEL: ult_17_v2i64:
21466 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21467 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
21468 ; SSSE3-NEXT: pand %xmm1, %xmm2
21469 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21470 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
21471 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
21472 ; SSSE3-NEXT: psrlw $4, %xmm0
21473 ; SSSE3-NEXT: pand %xmm1, %xmm0
21474 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
21475 ; SSSE3-NEXT: paddb %xmm4, %xmm3
21476 ; SSSE3-NEXT: pxor %xmm0, %xmm0
21477 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
21478 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
21479 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [17,17,17,17]
21480 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
21483 ; SSE41-LABEL: ult_17_v2i64:
21485 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21486 ; SSE41-NEXT: movdqa %xmm0, %xmm2
21487 ; SSE41-NEXT: pand %xmm1, %xmm2
21488 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21489 ; SSE41-NEXT: movdqa %xmm3, %xmm4
21490 ; SSE41-NEXT: pshufb %xmm2, %xmm4
21491 ; SSE41-NEXT: psrlw $4, %xmm0
21492 ; SSE41-NEXT: pand %xmm1, %xmm0
21493 ; SSE41-NEXT: pshufb %xmm0, %xmm3
21494 ; SSE41-NEXT: paddb %xmm4, %xmm3
21495 ; SSE41-NEXT: pxor %xmm0, %xmm0
21496 ; SSE41-NEXT: psadbw %xmm3, %xmm0
21497 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
21498 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [17,17,17,17]
21499 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
21502 ; AVX1-LABEL: ult_17_v2i64:
21504 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21505 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
21506 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21507 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
21508 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
21509 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
21510 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
21511 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
21512 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
21513 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21514 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [17,17]
21515 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
21518 ; AVX2-LABEL: ult_17_v2i64:
21520 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21521 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
21522 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21523 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
21524 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
21525 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
21526 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
21527 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
21528 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
21529 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21530 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [17,17]
21531 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
21534 ; AVX512VPOPCNTDQ-LABEL: ult_17_v2i64:
21535 ; AVX512VPOPCNTDQ: # %bb.0:
21536 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21537 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
21538 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [17,17]
21539 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
21540 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
21541 ; AVX512VPOPCNTDQ-NEXT: retq
21543 ; AVX512VPOPCNTDQVL-LABEL: ult_17_v2i64:
21544 ; AVX512VPOPCNTDQVL: # %bb.0:
21545 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
21546 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [17,17]
21547 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
21548 ; AVX512VPOPCNTDQVL-NEXT: retq
21550 ; BITALG_NOVLX-LABEL: ult_17_v2i64:
21551 ; BITALG_NOVLX: # %bb.0:
21552 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21553 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
21554 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
21555 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21556 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [17,17]
21557 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
21558 ; BITALG_NOVLX-NEXT: vzeroupper
21559 ; BITALG_NOVLX-NEXT: retq
21561 ; BITALG-LABEL: ult_17_v2i64:
21563 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
21564 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
21565 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21566 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [17,17]
21567 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
21568 ; BITALG-NEXT: retq
21569 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
21570 %3 = icmp ult <2 x i64> %2, <i64 17, i64 17>
21571 %4 = sext <2 x i1> %3 to <2 x i64>
21575 define <2 x i64> @ugt_17_v2i64(<2 x i64> %0) {
21576 ; SSE2-LABEL: ugt_17_v2i64:
21578 ; SSE2-NEXT: movdqa %xmm0, %xmm1
21579 ; SSE2-NEXT: psrlw $1, %xmm1
21580 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21581 ; SSE2-NEXT: psubb %xmm1, %xmm0
21582 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21583 ; SSE2-NEXT: movdqa %xmm0, %xmm2
21584 ; SSE2-NEXT: pand %xmm1, %xmm2
21585 ; SSE2-NEXT: psrlw $2, %xmm0
21586 ; SSE2-NEXT: pand %xmm1, %xmm0
21587 ; SSE2-NEXT: paddb %xmm2, %xmm0
21588 ; SSE2-NEXT: movdqa %xmm0, %xmm1
21589 ; SSE2-NEXT: psrlw $4, %xmm1
21590 ; SSE2-NEXT: paddb %xmm0, %xmm1
21591 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21592 ; SSE2-NEXT: pxor %xmm0, %xmm0
21593 ; SSE2-NEXT: psadbw %xmm1, %xmm0
21594 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
21595 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
21598 ; SSE3-LABEL: ugt_17_v2i64:
21600 ; SSE3-NEXT: movdqa %xmm0, %xmm1
21601 ; SSE3-NEXT: psrlw $1, %xmm1
21602 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21603 ; SSE3-NEXT: psubb %xmm1, %xmm0
21604 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21605 ; SSE3-NEXT: movdqa %xmm0, %xmm2
21606 ; SSE3-NEXT: pand %xmm1, %xmm2
21607 ; SSE3-NEXT: psrlw $2, %xmm0
21608 ; SSE3-NEXT: pand %xmm1, %xmm0
21609 ; SSE3-NEXT: paddb %xmm2, %xmm0
21610 ; SSE3-NEXT: movdqa %xmm0, %xmm1
21611 ; SSE3-NEXT: psrlw $4, %xmm1
21612 ; SSE3-NEXT: paddb %xmm0, %xmm1
21613 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21614 ; SSE3-NEXT: pxor %xmm0, %xmm0
21615 ; SSE3-NEXT: psadbw %xmm1, %xmm0
21616 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
21617 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
21620 ; SSSE3-LABEL: ugt_17_v2i64:
21622 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21623 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
21624 ; SSSE3-NEXT: pand %xmm1, %xmm2
21625 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21626 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
21627 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
21628 ; SSSE3-NEXT: psrlw $4, %xmm0
21629 ; SSSE3-NEXT: pand %xmm1, %xmm0
21630 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
21631 ; SSSE3-NEXT: paddb %xmm4, %xmm3
21632 ; SSSE3-NEXT: pxor %xmm0, %xmm0
21633 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
21634 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
21635 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
21638 ; SSE41-LABEL: ugt_17_v2i64:
21640 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21641 ; SSE41-NEXT: movdqa %xmm0, %xmm2
21642 ; SSE41-NEXT: pand %xmm1, %xmm2
21643 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21644 ; SSE41-NEXT: movdqa %xmm3, %xmm4
21645 ; SSE41-NEXT: pshufb %xmm2, %xmm4
21646 ; SSE41-NEXT: psrlw $4, %xmm0
21647 ; SSE41-NEXT: pand %xmm1, %xmm0
21648 ; SSE41-NEXT: pshufb %xmm0, %xmm3
21649 ; SSE41-NEXT: paddb %xmm4, %xmm3
21650 ; SSE41-NEXT: pxor %xmm0, %xmm0
21651 ; SSE41-NEXT: psadbw %xmm3, %xmm0
21652 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
21653 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
21656 ; AVX1-LABEL: ugt_17_v2i64:
21658 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21659 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
21660 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21661 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
21662 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
21663 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
21664 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
21665 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
21666 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
21667 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21668 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
21671 ; AVX2-LABEL: ugt_17_v2i64:
21673 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21674 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
21675 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21676 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
21677 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
21678 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
21679 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
21680 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
21681 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
21682 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21683 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
21686 ; AVX512VPOPCNTDQ-LABEL: ugt_17_v2i64:
21687 ; AVX512VPOPCNTDQ: # %bb.0:
21688 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21689 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
21690 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
21691 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
21692 ; AVX512VPOPCNTDQ-NEXT: retq
21694 ; AVX512VPOPCNTDQVL-LABEL: ugt_17_v2i64:
21695 ; AVX512VPOPCNTDQVL: # %bb.0:
21696 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
21697 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [17,17]
21698 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
21699 ; AVX512VPOPCNTDQVL-NEXT: retq
21701 ; BITALG_NOVLX-LABEL: ugt_17_v2i64:
21702 ; BITALG_NOVLX: # %bb.0:
21703 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21704 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
21705 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
21706 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21707 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
21708 ; BITALG_NOVLX-NEXT: vzeroupper
21709 ; BITALG_NOVLX-NEXT: retq
21711 ; BITALG-LABEL: ugt_17_v2i64:
21713 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
21714 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
21715 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21716 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [17,17]
21717 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
21718 ; BITALG-NEXT: retq
21719 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
21720 %3 = icmp ugt <2 x i64> %2, <i64 17, i64 17>
21721 %4 = sext <2 x i1> %3 to <2 x i64>
21725 define <2 x i64> @ult_18_v2i64(<2 x i64> %0) {
21726 ; SSE2-LABEL: ult_18_v2i64:
21728 ; SSE2-NEXT: movdqa %xmm0, %xmm1
21729 ; SSE2-NEXT: psrlw $1, %xmm1
21730 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21731 ; SSE2-NEXT: psubb %xmm1, %xmm0
21732 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21733 ; SSE2-NEXT: movdqa %xmm0, %xmm2
21734 ; SSE2-NEXT: pand %xmm1, %xmm2
21735 ; SSE2-NEXT: psrlw $2, %xmm0
21736 ; SSE2-NEXT: pand %xmm1, %xmm0
21737 ; SSE2-NEXT: paddb %xmm2, %xmm0
21738 ; SSE2-NEXT: movdqa %xmm0, %xmm1
21739 ; SSE2-NEXT: psrlw $4, %xmm1
21740 ; SSE2-NEXT: paddb %xmm0, %xmm1
21741 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21742 ; SSE2-NEXT: pxor %xmm0, %xmm0
21743 ; SSE2-NEXT: psadbw %xmm1, %xmm0
21744 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
21745 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [18,18,18,18]
21746 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
21749 ; SSE3-LABEL: ult_18_v2i64:
21751 ; SSE3-NEXT: movdqa %xmm0, %xmm1
21752 ; SSE3-NEXT: psrlw $1, %xmm1
21753 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21754 ; SSE3-NEXT: psubb %xmm1, %xmm0
21755 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21756 ; SSE3-NEXT: movdqa %xmm0, %xmm2
21757 ; SSE3-NEXT: pand %xmm1, %xmm2
21758 ; SSE3-NEXT: psrlw $2, %xmm0
21759 ; SSE3-NEXT: pand %xmm1, %xmm0
21760 ; SSE3-NEXT: paddb %xmm2, %xmm0
21761 ; SSE3-NEXT: movdqa %xmm0, %xmm1
21762 ; SSE3-NEXT: psrlw $4, %xmm1
21763 ; SSE3-NEXT: paddb %xmm0, %xmm1
21764 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21765 ; SSE3-NEXT: pxor %xmm0, %xmm0
21766 ; SSE3-NEXT: psadbw %xmm1, %xmm0
21767 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
21768 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [18,18,18,18]
21769 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
21772 ; SSSE3-LABEL: ult_18_v2i64:
21774 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21775 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
21776 ; SSSE3-NEXT: pand %xmm1, %xmm2
21777 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21778 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
21779 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
21780 ; SSSE3-NEXT: psrlw $4, %xmm0
21781 ; SSSE3-NEXT: pand %xmm1, %xmm0
21782 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
21783 ; SSSE3-NEXT: paddb %xmm4, %xmm3
21784 ; SSSE3-NEXT: pxor %xmm0, %xmm0
21785 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
21786 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
21787 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [18,18,18,18]
21788 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
21791 ; SSE41-LABEL: ult_18_v2i64:
21793 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21794 ; SSE41-NEXT: movdqa %xmm0, %xmm2
21795 ; SSE41-NEXT: pand %xmm1, %xmm2
21796 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21797 ; SSE41-NEXT: movdqa %xmm3, %xmm4
21798 ; SSE41-NEXT: pshufb %xmm2, %xmm4
21799 ; SSE41-NEXT: psrlw $4, %xmm0
21800 ; SSE41-NEXT: pand %xmm1, %xmm0
21801 ; SSE41-NEXT: pshufb %xmm0, %xmm3
21802 ; SSE41-NEXT: paddb %xmm4, %xmm3
21803 ; SSE41-NEXT: pxor %xmm0, %xmm0
21804 ; SSE41-NEXT: psadbw %xmm3, %xmm0
21805 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
21806 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [18,18,18,18]
21807 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
21810 ; AVX1-LABEL: ult_18_v2i64:
21812 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21813 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
21814 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21815 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
21816 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
21817 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
21818 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
21819 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
21820 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
21821 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21822 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [18,18]
21823 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
21826 ; AVX2-LABEL: ult_18_v2i64:
21828 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21829 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
21830 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21831 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
21832 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
21833 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
21834 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
21835 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
21836 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
21837 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21838 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [18,18]
21839 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
21842 ; AVX512VPOPCNTDQ-LABEL: ult_18_v2i64:
21843 ; AVX512VPOPCNTDQ: # %bb.0:
21844 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21845 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
21846 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [18,18]
21847 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
21848 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
21849 ; AVX512VPOPCNTDQ-NEXT: retq
21851 ; AVX512VPOPCNTDQVL-LABEL: ult_18_v2i64:
21852 ; AVX512VPOPCNTDQVL: # %bb.0:
21853 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
21854 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18,18]
21855 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
21856 ; AVX512VPOPCNTDQVL-NEXT: retq
21858 ; BITALG_NOVLX-LABEL: ult_18_v2i64:
21859 ; BITALG_NOVLX: # %bb.0:
21860 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21861 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
21862 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
21863 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21864 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [18,18]
21865 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
21866 ; BITALG_NOVLX-NEXT: vzeroupper
21867 ; BITALG_NOVLX-NEXT: retq
21869 ; BITALG-LABEL: ult_18_v2i64:
21871 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
21872 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
21873 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21874 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18,18]
21875 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
21876 ; BITALG-NEXT: retq
21877 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
21878 %3 = icmp ult <2 x i64> %2, <i64 18, i64 18>
21879 %4 = sext <2 x i1> %3 to <2 x i64>
21883 define <2 x i64> @ugt_18_v2i64(<2 x i64> %0) {
21884 ; SSE2-LABEL: ugt_18_v2i64:
21886 ; SSE2-NEXT: movdqa %xmm0, %xmm1
21887 ; SSE2-NEXT: psrlw $1, %xmm1
21888 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21889 ; SSE2-NEXT: psubb %xmm1, %xmm0
21890 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21891 ; SSE2-NEXT: movdqa %xmm0, %xmm2
21892 ; SSE2-NEXT: pand %xmm1, %xmm2
21893 ; SSE2-NEXT: psrlw $2, %xmm0
21894 ; SSE2-NEXT: pand %xmm1, %xmm0
21895 ; SSE2-NEXT: paddb %xmm2, %xmm0
21896 ; SSE2-NEXT: movdqa %xmm0, %xmm1
21897 ; SSE2-NEXT: psrlw $4, %xmm1
21898 ; SSE2-NEXT: paddb %xmm0, %xmm1
21899 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21900 ; SSE2-NEXT: pxor %xmm0, %xmm0
21901 ; SSE2-NEXT: psadbw %xmm1, %xmm0
21902 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
21903 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
21906 ; SSE3-LABEL: ugt_18_v2i64:
21908 ; SSE3-NEXT: movdqa %xmm0, %xmm1
21909 ; SSE3-NEXT: psrlw $1, %xmm1
21910 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21911 ; SSE3-NEXT: psubb %xmm1, %xmm0
21912 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
21913 ; SSE3-NEXT: movdqa %xmm0, %xmm2
21914 ; SSE3-NEXT: pand %xmm1, %xmm2
21915 ; SSE3-NEXT: psrlw $2, %xmm0
21916 ; SSE3-NEXT: pand %xmm1, %xmm0
21917 ; SSE3-NEXT: paddb %xmm2, %xmm0
21918 ; SSE3-NEXT: movdqa %xmm0, %xmm1
21919 ; SSE3-NEXT: psrlw $4, %xmm1
21920 ; SSE3-NEXT: paddb %xmm0, %xmm1
21921 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
21922 ; SSE3-NEXT: pxor %xmm0, %xmm0
21923 ; SSE3-NEXT: psadbw %xmm1, %xmm0
21924 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
21925 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
21928 ; SSSE3-LABEL: ugt_18_v2i64:
21930 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21931 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
21932 ; SSSE3-NEXT: pand %xmm1, %xmm2
21933 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21934 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
21935 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
21936 ; SSSE3-NEXT: psrlw $4, %xmm0
21937 ; SSSE3-NEXT: pand %xmm1, %xmm0
21938 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
21939 ; SSSE3-NEXT: paddb %xmm4, %xmm3
21940 ; SSSE3-NEXT: pxor %xmm0, %xmm0
21941 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
21942 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
21943 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
21946 ; SSE41-LABEL: ugt_18_v2i64:
21948 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21949 ; SSE41-NEXT: movdqa %xmm0, %xmm2
21950 ; SSE41-NEXT: pand %xmm1, %xmm2
21951 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21952 ; SSE41-NEXT: movdqa %xmm3, %xmm4
21953 ; SSE41-NEXT: pshufb %xmm2, %xmm4
21954 ; SSE41-NEXT: psrlw $4, %xmm0
21955 ; SSE41-NEXT: pand %xmm1, %xmm0
21956 ; SSE41-NEXT: pshufb %xmm0, %xmm3
21957 ; SSE41-NEXT: paddb %xmm4, %xmm3
21958 ; SSE41-NEXT: pxor %xmm0, %xmm0
21959 ; SSE41-NEXT: psadbw %xmm3, %xmm0
21960 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
21961 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
21964 ; AVX1-LABEL: ugt_18_v2i64:
21966 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21967 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
21968 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21969 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
21970 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
21971 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
21972 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
21973 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
21974 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
21975 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21976 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
21979 ; AVX2-LABEL: ugt_18_v2i64:
21981 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
21982 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
21983 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
21984 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
21985 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
21986 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
21987 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
21988 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
21989 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
21990 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
21991 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
21994 ; AVX512VPOPCNTDQ-LABEL: ugt_18_v2i64:
21995 ; AVX512VPOPCNTDQ: # %bb.0:
21996 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
21997 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
21998 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
21999 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
22000 ; AVX512VPOPCNTDQ-NEXT: retq
22002 ; AVX512VPOPCNTDQVL-LABEL: ugt_18_v2i64:
22003 ; AVX512VPOPCNTDQVL: # %bb.0:
22004 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
22005 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18,18]
22006 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
22007 ; AVX512VPOPCNTDQVL-NEXT: retq
22009 ; BITALG_NOVLX-LABEL: ugt_18_v2i64:
22010 ; BITALG_NOVLX: # %bb.0:
22011 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22012 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
22013 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
22014 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22015 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
22016 ; BITALG_NOVLX-NEXT: vzeroupper
22017 ; BITALG_NOVLX-NEXT: retq
22019 ; BITALG-LABEL: ugt_18_v2i64:
22021 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
22022 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
22023 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22024 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18,18]
22025 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
22026 ; BITALG-NEXT: retq
22027 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
22028 %3 = icmp ugt <2 x i64> %2, <i64 18, i64 18>
22029 %4 = sext <2 x i1> %3 to <2 x i64>
22033 define <2 x i64> @ult_19_v2i64(<2 x i64> %0) {
22034 ; SSE2-LABEL: ult_19_v2i64:
22036 ; SSE2-NEXT: movdqa %xmm0, %xmm1
22037 ; SSE2-NEXT: psrlw $1, %xmm1
22038 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22039 ; SSE2-NEXT: psubb %xmm1, %xmm0
22040 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22041 ; SSE2-NEXT: movdqa %xmm0, %xmm2
22042 ; SSE2-NEXT: pand %xmm1, %xmm2
22043 ; SSE2-NEXT: psrlw $2, %xmm0
22044 ; SSE2-NEXT: pand %xmm1, %xmm0
22045 ; SSE2-NEXT: paddb %xmm2, %xmm0
22046 ; SSE2-NEXT: movdqa %xmm0, %xmm1
22047 ; SSE2-NEXT: psrlw $4, %xmm1
22048 ; SSE2-NEXT: paddb %xmm0, %xmm1
22049 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22050 ; SSE2-NEXT: pxor %xmm0, %xmm0
22051 ; SSE2-NEXT: psadbw %xmm1, %xmm0
22052 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
22053 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [19,19,19,19]
22054 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
22057 ; SSE3-LABEL: ult_19_v2i64:
22059 ; SSE3-NEXT: movdqa %xmm0, %xmm1
22060 ; SSE3-NEXT: psrlw $1, %xmm1
22061 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22062 ; SSE3-NEXT: psubb %xmm1, %xmm0
22063 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22064 ; SSE3-NEXT: movdqa %xmm0, %xmm2
22065 ; SSE3-NEXT: pand %xmm1, %xmm2
22066 ; SSE3-NEXT: psrlw $2, %xmm0
22067 ; SSE3-NEXT: pand %xmm1, %xmm0
22068 ; SSE3-NEXT: paddb %xmm2, %xmm0
22069 ; SSE3-NEXT: movdqa %xmm0, %xmm1
22070 ; SSE3-NEXT: psrlw $4, %xmm1
22071 ; SSE3-NEXT: paddb %xmm0, %xmm1
22072 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22073 ; SSE3-NEXT: pxor %xmm0, %xmm0
22074 ; SSE3-NEXT: psadbw %xmm1, %xmm0
22075 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
22076 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [19,19,19,19]
22077 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
22080 ; SSSE3-LABEL: ult_19_v2i64:
22082 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22083 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
22084 ; SSSE3-NEXT: pand %xmm1, %xmm2
22085 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22086 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
22087 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
22088 ; SSSE3-NEXT: psrlw $4, %xmm0
22089 ; SSSE3-NEXT: pand %xmm1, %xmm0
22090 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
22091 ; SSSE3-NEXT: paddb %xmm4, %xmm3
22092 ; SSSE3-NEXT: pxor %xmm0, %xmm0
22093 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
22094 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
22095 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [19,19,19,19]
22096 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
22099 ; SSE41-LABEL: ult_19_v2i64:
22101 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22102 ; SSE41-NEXT: movdqa %xmm0, %xmm2
22103 ; SSE41-NEXT: pand %xmm1, %xmm2
22104 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22105 ; SSE41-NEXT: movdqa %xmm3, %xmm4
22106 ; SSE41-NEXT: pshufb %xmm2, %xmm4
22107 ; SSE41-NEXT: psrlw $4, %xmm0
22108 ; SSE41-NEXT: pand %xmm1, %xmm0
22109 ; SSE41-NEXT: pshufb %xmm0, %xmm3
22110 ; SSE41-NEXT: paddb %xmm4, %xmm3
22111 ; SSE41-NEXT: pxor %xmm0, %xmm0
22112 ; SSE41-NEXT: psadbw %xmm3, %xmm0
22113 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
22114 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [19,19,19,19]
22115 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
22118 ; AVX1-LABEL: ult_19_v2i64:
22120 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22121 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
22122 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22123 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
22124 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
22125 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
22126 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
22127 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
22128 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
22129 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22130 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [19,19]
22131 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
22134 ; AVX2-LABEL: ult_19_v2i64:
22136 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22137 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
22138 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22139 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
22140 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
22141 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
22142 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
22143 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
22144 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
22145 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22146 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [19,19]
22147 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
22150 ; AVX512VPOPCNTDQ-LABEL: ult_19_v2i64:
22151 ; AVX512VPOPCNTDQ: # %bb.0:
22152 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22153 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
22154 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [19,19]
22155 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
22156 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
22157 ; AVX512VPOPCNTDQ-NEXT: retq
22159 ; AVX512VPOPCNTDQVL-LABEL: ult_19_v2i64:
22160 ; AVX512VPOPCNTDQVL: # %bb.0:
22161 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
22162 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [19,19]
22163 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
22164 ; AVX512VPOPCNTDQVL-NEXT: retq
22166 ; BITALG_NOVLX-LABEL: ult_19_v2i64:
22167 ; BITALG_NOVLX: # %bb.0:
22168 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22169 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
22170 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
22171 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22172 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [19,19]
22173 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
22174 ; BITALG_NOVLX-NEXT: vzeroupper
22175 ; BITALG_NOVLX-NEXT: retq
22177 ; BITALG-LABEL: ult_19_v2i64:
22179 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
22180 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
22181 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22182 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [19,19]
22183 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
22184 ; BITALG-NEXT: retq
22185 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
22186 %3 = icmp ult <2 x i64> %2, <i64 19, i64 19>
22187 %4 = sext <2 x i1> %3 to <2 x i64>
22191 define <2 x i64> @ugt_19_v2i64(<2 x i64> %0) {
22192 ; SSE2-LABEL: ugt_19_v2i64:
22194 ; SSE2-NEXT: movdqa %xmm0, %xmm1
22195 ; SSE2-NEXT: psrlw $1, %xmm1
22196 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22197 ; SSE2-NEXT: psubb %xmm1, %xmm0
22198 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22199 ; SSE2-NEXT: movdqa %xmm0, %xmm2
22200 ; SSE2-NEXT: pand %xmm1, %xmm2
22201 ; SSE2-NEXT: psrlw $2, %xmm0
22202 ; SSE2-NEXT: pand %xmm1, %xmm0
22203 ; SSE2-NEXT: paddb %xmm2, %xmm0
22204 ; SSE2-NEXT: movdqa %xmm0, %xmm1
22205 ; SSE2-NEXT: psrlw $4, %xmm1
22206 ; SSE2-NEXT: paddb %xmm0, %xmm1
22207 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22208 ; SSE2-NEXT: pxor %xmm0, %xmm0
22209 ; SSE2-NEXT: psadbw %xmm1, %xmm0
22210 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
22211 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
22214 ; SSE3-LABEL: ugt_19_v2i64:
22216 ; SSE3-NEXT: movdqa %xmm0, %xmm1
22217 ; SSE3-NEXT: psrlw $1, %xmm1
22218 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22219 ; SSE3-NEXT: psubb %xmm1, %xmm0
22220 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22221 ; SSE3-NEXT: movdqa %xmm0, %xmm2
22222 ; SSE3-NEXT: pand %xmm1, %xmm2
22223 ; SSE3-NEXT: psrlw $2, %xmm0
22224 ; SSE3-NEXT: pand %xmm1, %xmm0
22225 ; SSE3-NEXT: paddb %xmm2, %xmm0
22226 ; SSE3-NEXT: movdqa %xmm0, %xmm1
22227 ; SSE3-NEXT: psrlw $4, %xmm1
22228 ; SSE3-NEXT: paddb %xmm0, %xmm1
22229 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22230 ; SSE3-NEXT: pxor %xmm0, %xmm0
22231 ; SSE3-NEXT: psadbw %xmm1, %xmm0
22232 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
22233 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
22236 ; SSSE3-LABEL: ugt_19_v2i64:
22238 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22239 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
22240 ; SSSE3-NEXT: pand %xmm1, %xmm2
22241 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22242 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
22243 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
22244 ; SSSE3-NEXT: psrlw $4, %xmm0
22245 ; SSSE3-NEXT: pand %xmm1, %xmm0
22246 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
22247 ; SSSE3-NEXT: paddb %xmm4, %xmm3
22248 ; SSSE3-NEXT: pxor %xmm0, %xmm0
22249 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
22250 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
22251 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
22254 ; SSE41-LABEL: ugt_19_v2i64:
22256 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22257 ; SSE41-NEXT: movdqa %xmm0, %xmm2
22258 ; SSE41-NEXT: pand %xmm1, %xmm2
22259 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22260 ; SSE41-NEXT: movdqa %xmm3, %xmm4
22261 ; SSE41-NEXT: pshufb %xmm2, %xmm4
22262 ; SSE41-NEXT: psrlw $4, %xmm0
22263 ; SSE41-NEXT: pand %xmm1, %xmm0
22264 ; SSE41-NEXT: pshufb %xmm0, %xmm3
22265 ; SSE41-NEXT: paddb %xmm4, %xmm3
22266 ; SSE41-NEXT: pxor %xmm0, %xmm0
22267 ; SSE41-NEXT: psadbw %xmm3, %xmm0
22268 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
22269 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
22272 ; AVX1-LABEL: ugt_19_v2i64:
22274 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22275 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
22276 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22277 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
22278 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
22279 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
22280 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
22281 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
22282 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
22283 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22284 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
22287 ; AVX2-LABEL: ugt_19_v2i64:
22289 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22290 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
22291 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22292 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
22293 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
22294 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
22295 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
22296 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
22297 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
22298 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22299 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
22302 ; AVX512VPOPCNTDQ-LABEL: ugt_19_v2i64:
22303 ; AVX512VPOPCNTDQ: # %bb.0:
22304 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22305 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
22306 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
22307 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
22308 ; AVX512VPOPCNTDQ-NEXT: retq
22310 ; AVX512VPOPCNTDQVL-LABEL: ugt_19_v2i64:
22311 ; AVX512VPOPCNTDQVL: # %bb.0:
22312 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
22313 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [19,19]
22314 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
22315 ; AVX512VPOPCNTDQVL-NEXT: retq
22317 ; BITALG_NOVLX-LABEL: ugt_19_v2i64:
22318 ; BITALG_NOVLX: # %bb.0:
22319 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22320 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
22321 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
22322 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22323 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
22324 ; BITALG_NOVLX-NEXT: vzeroupper
22325 ; BITALG_NOVLX-NEXT: retq
22327 ; BITALG-LABEL: ugt_19_v2i64:
22329 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
22330 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
22331 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22332 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [19,19]
22333 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
22334 ; BITALG-NEXT: retq
22335 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
22336 %3 = icmp ugt <2 x i64> %2, <i64 19, i64 19>
22337 %4 = sext <2 x i1> %3 to <2 x i64>
22341 define <2 x i64> @ult_20_v2i64(<2 x i64> %0) {
22342 ; SSE2-LABEL: ult_20_v2i64:
22344 ; SSE2-NEXT: movdqa %xmm0, %xmm1
22345 ; SSE2-NEXT: psrlw $1, %xmm1
22346 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22347 ; SSE2-NEXT: psubb %xmm1, %xmm0
22348 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22349 ; SSE2-NEXT: movdqa %xmm0, %xmm2
22350 ; SSE2-NEXT: pand %xmm1, %xmm2
22351 ; SSE2-NEXT: psrlw $2, %xmm0
22352 ; SSE2-NEXT: pand %xmm1, %xmm0
22353 ; SSE2-NEXT: paddb %xmm2, %xmm0
22354 ; SSE2-NEXT: movdqa %xmm0, %xmm1
22355 ; SSE2-NEXT: psrlw $4, %xmm1
22356 ; SSE2-NEXT: paddb %xmm0, %xmm1
22357 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22358 ; SSE2-NEXT: pxor %xmm0, %xmm0
22359 ; SSE2-NEXT: psadbw %xmm1, %xmm0
22360 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
22361 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [20,20,20,20]
22362 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
22365 ; SSE3-LABEL: ult_20_v2i64:
22367 ; SSE3-NEXT: movdqa %xmm0, %xmm1
22368 ; SSE3-NEXT: psrlw $1, %xmm1
22369 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22370 ; SSE3-NEXT: psubb %xmm1, %xmm0
22371 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22372 ; SSE3-NEXT: movdqa %xmm0, %xmm2
22373 ; SSE3-NEXT: pand %xmm1, %xmm2
22374 ; SSE3-NEXT: psrlw $2, %xmm0
22375 ; SSE3-NEXT: pand %xmm1, %xmm0
22376 ; SSE3-NEXT: paddb %xmm2, %xmm0
22377 ; SSE3-NEXT: movdqa %xmm0, %xmm1
22378 ; SSE3-NEXT: psrlw $4, %xmm1
22379 ; SSE3-NEXT: paddb %xmm0, %xmm1
22380 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22381 ; SSE3-NEXT: pxor %xmm0, %xmm0
22382 ; SSE3-NEXT: psadbw %xmm1, %xmm0
22383 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
22384 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [20,20,20,20]
22385 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
22388 ; SSSE3-LABEL: ult_20_v2i64:
22390 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22391 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
22392 ; SSSE3-NEXT: pand %xmm1, %xmm2
22393 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22394 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
22395 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
22396 ; SSSE3-NEXT: psrlw $4, %xmm0
22397 ; SSSE3-NEXT: pand %xmm1, %xmm0
22398 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
22399 ; SSSE3-NEXT: paddb %xmm4, %xmm3
22400 ; SSSE3-NEXT: pxor %xmm0, %xmm0
22401 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
22402 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
22403 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [20,20,20,20]
22404 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
22407 ; SSE41-LABEL: ult_20_v2i64:
22409 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22410 ; SSE41-NEXT: movdqa %xmm0, %xmm2
22411 ; SSE41-NEXT: pand %xmm1, %xmm2
22412 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22413 ; SSE41-NEXT: movdqa %xmm3, %xmm4
22414 ; SSE41-NEXT: pshufb %xmm2, %xmm4
22415 ; SSE41-NEXT: psrlw $4, %xmm0
22416 ; SSE41-NEXT: pand %xmm1, %xmm0
22417 ; SSE41-NEXT: pshufb %xmm0, %xmm3
22418 ; SSE41-NEXT: paddb %xmm4, %xmm3
22419 ; SSE41-NEXT: pxor %xmm0, %xmm0
22420 ; SSE41-NEXT: psadbw %xmm3, %xmm0
22421 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
22422 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [20,20,20,20]
22423 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
22426 ; AVX1-LABEL: ult_20_v2i64:
22428 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22429 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
22430 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22431 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
22432 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
22433 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
22434 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
22435 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
22436 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
22437 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22438 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [20,20]
22439 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
22442 ; AVX2-LABEL: ult_20_v2i64:
22444 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22445 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
22446 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22447 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
22448 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
22449 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
22450 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
22451 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
22452 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
22453 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22454 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [20,20]
22455 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
22458 ; AVX512VPOPCNTDQ-LABEL: ult_20_v2i64:
22459 ; AVX512VPOPCNTDQ: # %bb.0:
22460 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22461 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
22462 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [20,20]
22463 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
22464 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
22465 ; AVX512VPOPCNTDQ-NEXT: retq
22467 ; AVX512VPOPCNTDQVL-LABEL: ult_20_v2i64:
22468 ; AVX512VPOPCNTDQVL: # %bb.0:
22469 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
22470 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [20,20]
22471 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
22472 ; AVX512VPOPCNTDQVL-NEXT: retq
22474 ; BITALG_NOVLX-LABEL: ult_20_v2i64:
22475 ; BITALG_NOVLX: # %bb.0:
22476 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22477 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
22478 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
22479 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22480 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [20,20]
22481 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
22482 ; BITALG_NOVLX-NEXT: vzeroupper
22483 ; BITALG_NOVLX-NEXT: retq
22485 ; BITALG-LABEL: ult_20_v2i64:
22487 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
22488 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
22489 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22490 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [20,20]
22491 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
22492 ; BITALG-NEXT: retq
22493 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
22494 %3 = icmp ult <2 x i64> %2, <i64 20, i64 20>
22495 %4 = sext <2 x i1> %3 to <2 x i64>
22499 define <2 x i64> @ugt_20_v2i64(<2 x i64> %0) {
22500 ; SSE2-LABEL: ugt_20_v2i64:
22502 ; SSE2-NEXT: movdqa %xmm0, %xmm1
22503 ; SSE2-NEXT: psrlw $1, %xmm1
22504 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22505 ; SSE2-NEXT: psubb %xmm1, %xmm0
22506 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22507 ; SSE2-NEXT: movdqa %xmm0, %xmm2
22508 ; SSE2-NEXT: pand %xmm1, %xmm2
22509 ; SSE2-NEXT: psrlw $2, %xmm0
22510 ; SSE2-NEXT: pand %xmm1, %xmm0
22511 ; SSE2-NEXT: paddb %xmm2, %xmm0
22512 ; SSE2-NEXT: movdqa %xmm0, %xmm1
22513 ; SSE2-NEXT: psrlw $4, %xmm1
22514 ; SSE2-NEXT: paddb %xmm0, %xmm1
22515 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22516 ; SSE2-NEXT: pxor %xmm0, %xmm0
22517 ; SSE2-NEXT: psadbw %xmm1, %xmm0
22518 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
22519 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
22522 ; SSE3-LABEL: ugt_20_v2i64:
22524 ; SSE3-NEXT: movdqa %xmm0, %xmm1
22525 ; SSE3-NEXT: psrlw $1, %xmm1
22526 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22527 ; SSE3-NEXT: psubb %xmm1, %xmm0
22528 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22529 ; SSE3-NEXT: movdqa %xmm0, %xmm2
22530 ; SSE3-NEXT: pand %xmm1, %xmm2
22531 ; SSE3-NEXT: psrlw $2, %xmm0
22532 ; SSE3-NEXT: pand %xmm1, %xmm0
22533 ; SSE3-NEXT: paddb %xmm2, %xmm0
22534 ; SSE3-NEXT: movdqa %xmm0, %xmm1
22535 ; SSE3-NEXT: psrlw $4, %xmm1
22536 ; SSE3-NEXT: paddb %xmm0, %xmm1
22537 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22538 ; SSE3-NEXT: pxor %xmm0, %xmm0
22539 ; SSE3-NEXT: psadbw %xmm1, %xmm0
22540 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
22541 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
22544 ; SSSE3-LABEL: ugt_20_v2i64:
22546 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22547 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
22548 ; SSSE3-NEXT: pand %xmm1, %xmm2
22549 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22550 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
22551 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
22552 ; SSSE3-NEXT: psrlw $4, %xmm0
22553 ; SSSE3-NEXT: pand %xmm1, %xmm0
22554 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
22555 ; SSSE3-NEXT: paddb %xmm4, %xmm3
22556 ; SSSE3-NEXT: pxor %xmm0, %xmm0
22557 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
22558 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
22559 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
22562 ; SSE41-LABEL: ugt_20_v2i64:
22564 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22565 ; SSE41-NEXT: movdqa %xmm0, %xmm2
22566 ; SSE41-NEXT: pand %xmm1, %xmm2
22567 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22568 ; SSE41-NEXT: movdqa %xmm3, %xmm4
22569 ; SSE41-NEXT: pshufb %xmm2, %xmm4
22570 ; SSE41-NEXT: psrlw $4, %xmm0
22571 ; SSE41-NEXT: pand %xmm1, %xmm0
22572 ; SSE41-NEXT: pshufb %xmm0, %xmm3
22573 ; SSE41-NEXT: paddb %xmm4, %xmm3
22574 ; SSE41-NEXT: pxor %xmm0, %xmm0
22575 ; SSE41-NEXT: psadbw %xmm3, %xmm0
22576 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
22577 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
22580 ; AVX1-LABEL: ugt_20_v2i64:
22582 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22583 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
22584 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22585 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
22586 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
22587 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
22588 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
22589 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
22590 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
22591 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22592 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
22595 ; AVX2-LABEL: ugt_20_v2i64:
22597 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22598 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
22599 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22600 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
22601 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
22602 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
22603 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
22604 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
22605 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
22606 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22607 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
22610 ; AVX512VPOPCNTDQ-LABEL: ugt_20_v2i64:
22611 ; AVX512VPOPCNTDQ: # %bb.0:
22612 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22613 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
22614 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
22615 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
22616 ; AVX512VPOPCNTDQ-NEXT: retq
22618 ; AVX512VPOPCNTDQVL-LABEL: ugt_20_v2i64:
22619 ; AVX512VPOPCNTDQVL: # %bb.0:
22620 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
22621 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [20,20]
22622 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
22623 ; AVX512VPOPCNTDQVL-NEXT: retq
22625 ; BITALG_NOVLX-LABEL: ugt_20_v2i64:
22626 ; BITALG_NOVLX: # %bb.0:
22627 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22628 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
22629 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
22630 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22631 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
22632 ; BITALG_NOVLX-NEXT: vzeroupper
22633 ; BITALG_NOVLX-NEXT: retq
22635 ; BITALG-LABEL: ugt_20_v2i64:
22637 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
22638 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
22639 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22640 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [20,20]
22641 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
22642 ; BITALG-NEXT: retq
22643 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
22644 %3 = icmp ugt <2 x i64> %2, <i64 20, i64 20>
22645 %4 = sext <2 x i1> %3 to <2 x i64>
22649 define <2 x i64> @ult_21_v2i64(<2 x i64> %0) {
22650 ; SSE2-LABEL: ult_21_v2i64:
22652 ; SSE2-NEXT: movdqa %xmm0, %xmm1
22653 ; SSE2-NEXT: psrlw $1, %xmm1
22654 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22655 ; SSE2-NEXT: psubb %xmm1, %xmm0
22656 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22657 ; SSE2-NEXT: movdqa %xmm0, %xmm2
22658 ; SSE2-NEXT: pand %xmm1, %xmm2
22659 ; SSE2-NEXT: psrlw $2, %xmm0
22660 ; SSE2-NEXT: pand %xmm1, %xmm0
22661 ; SSE2-NEXT: paddb %xmm2, %xmm0
22662 ; SSE2-NEXT: movdqa %xmm0, %xmm1
22663 ; SSE2-NEXT: psrlw $4, %xmm1
22664 ; SSE2-NEXT: paddb %xmm0, %xmm1
22665 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22666 ; SSE2-NEXT: pxor %xmm0, %xmm0
22667 ; SSE2-NEXT: psadbw %xmm1, %xmm0
22668 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
22669 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [21,21,21,21]
22670 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
22673 ; SSE3-LABEL: ult_21_v2i64:
22675 ; SSE3-NEXT: movdqa %xmm0, %xmm1
22676 ; SSE3-NEXT: psrlw $1, %xmm1
22677 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22678 ; SSE3-NEXT: psubb %xmm1, %xmm0
22679 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22680 ; SSE3-NEXT: movdqa %xmm0, %xmm2
22681 ; SSE3-NEXT: pand %xmm1, %xmm2
22682 ; SSE3-NEXT: psrlw $2, %xmm0
22683 ; SSE3-NEXT: pand %xmm1, %xmm0
22684 ; SSE3-NEXT: paddb %xmm2, %xmm0
22685 ; SSE3-NEXT: movdqa %xmm0, %xmm1
22686 ; SSE3-NEXT: psrlw $4, %xmm1
22687 ; SSE3-NEXT: paddb %xmm0, %xmm1
22688 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22689 ; SSE3-NEXT: pxor %xmm0, %xmm0
22690 ; SSE3-NEXT: psadbw %xmm1, %xmm0
22691 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
22692 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [21,21,21,21]
22693 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
22696 ; SSSE3-LABEL: ult_21_v2i64:
22698 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22699 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
22700 ; SSSE3-NEXT: pand %xmm1, %xmm2
22701 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22702 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
22703 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
22704 ; SSSE3-NEXT: psrlw $4, %xmm0
22705 ; SSSE3-NEXT: pand %xmm1, %xmm0
22706 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
22707 ; SSSE3-NEXT: paddb %xmm4, %xmm3
22708 ; SSSE3-NEXT: pxor %xmm0, %xmm0
22709 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
22710 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
22711 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [21,21,21,21]
22712 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
22715 ; SSE41-LABEL: ult_21_v2i64:
22717 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22718 ; SSE41-NEXT: movdqa %xmm0, %xmm2
22719 ; SSE41-NEXT: pand %xmm1, %xmm2
22720 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22721 ; SSE41-NEXT: movdqa %xmm3, %xmm4
22722 ; SSE41-NEXT: pshufb %xmm2, %xmm4
22723 ; SSE41-NEXT: psrlw $4, %xmm0
22724 ; SSE41-NEXT: pand %xmm1, %xmm0
22725 ; SSE41-NEXT: pshufb %xmm0, %xmm3
22726 ; SSE41-NEXT: paddb %xmm4, %xmm3
22727 ; SSE41-NEXT: pxor %xmm0, %xmm0
22728 ; SSE41-NEXT: psadbw %xmm3, %xmm0
22729 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
22730 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [21,21,21,21]
22731 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
22734 ; AVX1-LABEL: ult_21_v2i64:
22736 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22737 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
22738 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22739 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
22740 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
22741 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
22742 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
22743 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
22744 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
22745 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22746 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [21,21]
22747 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
22750 ; AVX2-LABEL: ult_21_v2i64:
22752 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22753 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
22754 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22755 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
22756 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
22757 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
22758 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
22759 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
22760 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
22761 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22762 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [21,21]
22763 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
22766 ; AVX512VPOPCNTDQ-LABEL: ult_21_v2i64:
22767 ; AVX512VPOPCNTDQ: # %bb.0:
22768 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22769 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
22770 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [21,21]
22771 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
22772 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
22773 ; AVX512VPOPCNTDQ-NEXT: retq
22775 ; AVX512VPOPCNTDQVL-LABEL: ult_21_v2i64:
22776 ; AVX512VPOPCNTDQVL: # %bb.0:
22777 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
22778 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [21,21]
22779 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
22780 ; AVX512VPOPCNTDQVL-NEXT: retq
22782 ; BITALG_NOVLX-LABEL: ult_21_v2i64:
22783 ; BITALG_NOVLX: # %bb.0:
22784 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22785 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
22786 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
22787 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22788 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [21,21]
22789 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
22790 ; BITALG_NOVLX-NEXT: vzeroupper
22791 ; BITALG_NOVLX-NEXT: retq
22793 ; BITALG-LABEL: ult_21_v2i64:
22795 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
22796 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
22797 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22798 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [21,21]
22799 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
22800 ; BITALG-NEXT: retq
22801 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
22802 %3 = icmp ult <2 x i64> %2, <i64 21, i64 21>
22803 %4 = sext <2 x i1> %3 to <2 x i64>
22807 define <2 x i64> @ugt_21_v2i64(<2 x i64> %0) {
22808 ; SSE2-LABEL: ugt_21_v2i64:
22810 ; SSE2-NEXT: movdqa %xmm0, %xmm1
22811 ; SSE2-NEXT: psrlw $1, %xmm1
22812 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22813 ; SSE2-NEXT: psubb %xmm1, %xmm0
22814 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22815 ; SSE2-NEXT: movdqa %xmm0, %xmm2
22816 ; SSE2-NEXT: pand %xmm1, %xmm2
22817 ; SSE2-NEXT: psrlw $2, %xmm0
22818 ; SSE2-NEXT: pand %xmm1, %xmm0
22819 ; SSE2-NEXT: paddb %xmm2, %xmm0
22820 ; SSE2-NEXT: movdqa %xmm0, %xmm1
22821 ; SSE2-NEXT: psrlw $4, %xmm1
22822 ; SSE2-NEXT: paddb %xmm0, %xmm1
22823 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22824 ; SSE2-NEXT: pxor %xmm0, %xmm0
22825 ; SSE2-NEXT: psadbw %xmm1, %xmm0
22826 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
22827 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
22830 ; SSE3-LABEL: ugt_21_v2i64:
22832 ; SSE3-NEXT: movdqa %xmm0, %xmm1
22833 ; SSE3-NEXT: psrlw $1, %xmm1
22834 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22835 ; SSE3-NEXT: psubb %xmm1, %xmm0
22836 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22837 ; SSE3-NEXT: movdqa %xmm0, %xmm2
22838 ; SSE3-NEXT: pand %xmm1, %xmm2
22839 ; SSE3-NEXT: psrlw $2, %xmm0
22840 ; SSE3-NEXT: pand %xmm1, %xmm0
22841 ; SSE3-NEXT: paddb %xmm2, %xmm0
22842 ; SSE3-NEXT: movdqa %xmm0, %xmm1
22843 ; SSE3-NEXT: psrlw $4, %xmm1
22844 ; SSE3-NEXT: paddb %xmm0, %xmm1
22845 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22846 ; SSE3-NEXT: pxor %xmm0, %xmm0
22847 ; SSE3-NEXT: psadbw %xmm1, %xmm0
22848 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
22849 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
22852 ; SSSE3-LABEL: ugt_21_v2i64:
22854 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22855 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
22856 ; SSSE3-NEXT: pand %xmm1, %xmm2
22857 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22858 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
22859 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
22860 ; SSSE3-NEXT: psrlw $4, %xmm0
22861 ; SSSE3-NEXT: pand %xmm1, %xmm0
22862 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
22863 ; SSSE3-NEXT: paddb %xmm4, %xmm3
22864 ; SSSE3-NEXT: pxor %xmm0, %xmm0
22865 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
22866 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
22867 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
22870 ; SSE41-LABEL: ugt_21_v2i64:
22872 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22873 ; SSE41-NEXT: movdqa %xmm0, %xmm2
22874 ; SSE41-NEXT: pand %xmm1, %xmm2
22875 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22876 ; SSE41-NEXT: movdqa %xmm3, %xmm4
22877 ; SSE41-NEXT: pshufb %xmm2, %xmm4
22878 ; SSE41-NEXT: psrlw $4, %xmm0
22879 ; SSE41-NEXT: pand %xmm1, %xmm0
22880 ; SSE41-NEXT: pshufb %xmm0, %xmm3
22881 ; SSE41-NEXT: paddb %xmm4, %xmm3
22882 ; SSE41-NEXT: pxor %xmm0, %xmm0
22883 ; SSE41-NEXT: psadbw %xmm3, %xmm0
22884 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
22885 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
22888 ; AVX1-LABEL: ugt_21_v2i64:
22890 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22891 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
22892 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22893 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
22894 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
22895 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
22896 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
22897 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
22898 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
22899 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22900 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
22903 ; AVX2-LABEL: ugt_21_v2i64:
22905 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
22906 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
22907 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
22908 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
22909 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
22910 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
22911 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
22912 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
22913 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
22914 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22915 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
22918 ; AVX512VPOPCNTDQ-LABEL: ugt_21_v2i64:
22919 ; AVX512VPOPCNTDQ: # %bb.0:
22920 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22921 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
22922 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
22923 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
22924 ; AVX512VPOPCNTDQ-NEXT: retq
22926 ; AVX512VPOPCNTDQVL-LABEL: ugt_21_v2i64:
22927 ; AVX512VPOPCNTDQVL: # %bb.0:
22928 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
22929 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [21,21]
22930 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
22931 ; AVX512VPOPCNTDQVL-NEXT: retq
22933 ; BITALG_NOVLX-LABEL: ugt_21_v2i64:
22934 ; BITALG_NOVLX: # %bb.0:
22935 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
22936 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
22937 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
22938 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22939 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
22940 ; BITALG_NOVLX-NEXT: vzeroupper
22941 ; BITALG_NOVLX-NEXT: retq
22943 ; BITALG-LABEL: ugt_21_v2i64:
22945 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
22946 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
22947 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
22948 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [21,21]
22949 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
22950 ; BITALG-NEXT: retq
22951 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
22952 %3 = icmp ugt <2 x i64> %2, <i64 21, i64 21>
22953 %4 = sext <2 x i1> %3 to <2 x i64>
22957 define <2 x i64> @ult_22_v2i64(<2 x i64> %0) {
22958 ; SSE2-LABEL: ult_22_v2i64:
22960 ; SSE2-NEXT: movdqa %xmm0, %xmm1
22961 ; SSE2-NEXT: psrlw $1, %xmm1
22962 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22963 ; SSE2-NEXT: psubb %xmm1, %xmm0
22964 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22965 ; SSE2-NEXT: movdqa %xmm0, %xmm2
22966 ; SSE2-NEXT: pand %xmm1, %xmm2
22967 ; SSE2-NEXT: psrlw $2, %xmm0
22968 ; SSE2-NEXT: pand %xmm1, %xmm0
22969 ; SSE2-NEXT: paddb %xmm2, %xmm0
22970 ; SSE2-NEXT: movdqa %xmm0, %xmm1
22971 ; SSE2-NEXT: psrlw $4, %xmm1
22972 ; SSE2-NEXT: paddb %xmm0, %xmm1
22973 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22974 ; SSE2-NEXT: pxor %xmm0, %xmm0
22975 ; SSE2-NEXT: psadbw %xmm1, %xmm0
22976 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
22977 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [22,22,22,22]
22978 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
22981 ; SSE3-LABEL: ult_22_v2i64:
22983 ; SSE3-NEXT: movdqa %xmm0, %xmm1
22984 ; SSE3-NEXT: psrlw $1, %xmm1
22985 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22986 ; SSE3-NEXT: psubb %xmm1, %xmm0
22987 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
22988 ; SSE3-NEXT: movdqa %xmm0, %xmm2
22989 ; SSE3-NEXT: pand %xmm1, %xmm2
22990 ; SSE3-NEXT: psrlw $2, %xmm0
22991 ; SSE3-NEXT: pand %xmm1, %xmm0
22992 ; SSE3-NEXT: paddb %xmm2, %xmm0
22993 ; SSE3-NEXT: movdqa %xmm0, %xmm1
22994 ; SSE3-NEXT: psrlw $4, %xmm1
22995 ; SSE3-NEXT: paddb %xmm0, %xmm1
22996 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
22997 ; SSE3-NEXT: pxor %xmm0, %xmm0
22998 ; SSE3-NEXT: psadbw %xmm1, %xmm0
22999 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
23000 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [22,22,22,22]
23001 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
23004 ; SSSE3-LABEL: ult_22_v2i64:
23006 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23007 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
23008 ; SSSE3-NEXT: pand %xmm1, %xmm2
23009 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23010 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
23011 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
23012 ; SSSE3-NEXT: psrlw $4, %xmm0
23013 ; SSSE3-NEXT: pand %xmm1, %xmm0
23014 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
23015 ; SSSE3-NEXT: paddb %xmm4, %xmm3
23016 ; SSSE3-NEXT: pxor %xmm0, %xmm0
23017 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
23018 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
23019 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [22,22,22,22]
23020 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
23023 ; SSE41-LABEL: ult_22_v2i64:
23025 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23026 ; SSE41-NEXT: movdqa %xmm0, %xmm2
23027 ; SSE41-NEXT: pand %xmm1, %xmm2
23028 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23029 ; SSE41-NEXT: movdqa %xmm3, %xmm4
23030 ; SSE41-NEXT: pshufb %xmm2, %xmm4
23031 ; SSE41-NEXT: psrlw $4, %xmm0
23032 ; SSE41-NEXT: pand %xmm1, %xmm0
23033 ; SSE41-NEXT: pshufb %xmm0, %xmm3
23034 ; SSE41-NEXT: paddb %xmm4, %xmm3
23035 ; SSE41-NEXT: pxor %xmm0, %xmm0
23036 ; SSE41-NEXT: psadbw %xmm3, %xmm0
23037 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
23038 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [22,22,22,22]
23039 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
23042 ; AVX1-LABEL: ult_22_v2i64:
23044 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23045 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
23046 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23047 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
23048 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
23049 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
23050 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
23051 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
23052 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
23053 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23054 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [22,22]
23055 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
23058 ; AVX2-LABEL: ult_22_v2i64:
23060 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23061 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
23062 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23063 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
23064 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
23065 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
23066 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
23067 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
23068 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
23069 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23070 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [22,22]
23071 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
23074 ; AVX512VPOPCNTDQ-LABEL: ult_22_v2i64:
23075 ; AVX512VPOPCNTDQ: # %bb.0:
23076 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
23077 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
23078 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [22,22]
23079 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
23080 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
23081 ; AVX512VPOPCNTDQ-NEXT: retq
23083 ; AVX512VPOPCNTDQVL-LABEL: ult_22_v2i64:
23084 ; AVX512VPOPCNTDQVL: # %bb.0:
23085 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
23086 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [22,22]
23087 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
23088 ; AVX512VPOPCNTDQVL-NEXT: retq
23090 ; BITALG_NOVLX-LABEL: ult_22_v2i64:
23091 ; BITALG_NOVLX: # %bb.0:
23092 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
23093 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
23094 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
23095 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23096 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [22,22]
23097 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
23098 ; BITALG_NOVLX-NEXT: vzeroupper
23099 ; BITALG_NOVLX-NEXT: retq
23101 ; BITALG-LABEL: ult_22_v2i64:
23103 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
23104 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
23105 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23106 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [22,22]
23107 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
23108 ; BITALG-NEXT: retq
23109 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
23110 %3 = icmp ult <2 x i64> %2, <i64 22, i64 22>
23111 %4 = sext <2 x i1> %3 to <2 x i64>
23115 define <2 x i64> @ugt_22_v2i64(<2 x i64> %0) {
23116 ; SSE2-LABEL: ugt_22_v2i64:
23118 ; SSE2-NEXT: movdqa %xmm0, %xmm1
23119 ; SSE2-NEXT: psrlw $1, %xmm1
23120 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23121 ; SSE2-NEXT: psubb %xmm1, %xmm0
23122 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23123 ; SSE2-NEXT: movdqa %xmm0, %xmm2
23124 ; SSE2-NEXT: pand %xmm1, %xmm2
23125 ; SSE2-NEXT: psrlw $2, %xmm0
23126 ; SSE2-NEXT: pand %xmm1, %xmm0
23127 ; SSE2-NEXT: paddb %xmm2, %xmm0
23128 ; SSE2-NEXT: movdqa %xmm0, %xmm1
23129 ; SSE2-NEXT: psrlw $4, %xmm1
23130 ; SSE2-NEXT: paddb %xmm0, %xmm1
23131 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23132 ; SSE2-NEXT: pxor %xmm0, %xmm0
23133 ; SSE2-NEXT: psadbw %xmm1, %xmm0
23134 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
23135 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
23138 ; SSE3-LABEL: ugt_22_v2i64:
23140 ; SSE3-NEXT: movdqa %xmm0, %xmm1
23141 ; SSE3-NEXT: psrlw $1, %xmm1
23142 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23143 ; SSE3-NEXT: psubb %xmm1, %xmm0
23144 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23145 ; SSE3-NEXT: movdqa %xmm0, %xmm2
23146 ; SSE3-NEXT: pand %xmm1, %xmm2
23147 ; SSE3-NEXT: psrlw $2, %xmm0
23148 ; SSE3-NEXT: pand %xmm1, %xmm0
23149 ; SSE3-NEXT: paddb %xmm2, %xmm0
23150 ; SSE3-NEXT: movdqa %xmm0, %xmm1
23151 ; SSE3-NEXT: psrlw $4, %xmm1
23152 ; SSE3-NEXT: paddb %xmm0, %xmm1
23153 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23154 ; SSE3-NEXT: pxor %xmm0, %xmm0
23155 ; SSE3-NEXT: psadbw %xmm1, %xmm0
23156 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
23157 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
23160 ; SSSE3-LABEL: ugt_22_v2i64:
23162 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23163 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
23164 ; SSSE3-NEXT: pand %xmm1, %xmm2
23165 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23166 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
23167 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
23168 ; SSSE3-NEXT: psrlw $4, %xmm0
23169 ; SSSE3-NEXT: pand %xmm1, %xmm0
23170 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
23171 ; SSSE3-NEXT: paddb %xmm4, %xmm3
23172 ; SSSE3-NEXT: pxor %xmm0, %xmm0
23173 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
23174 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
23175 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
23178 ; SSE41-LABEL: ugt_22_v2i64:
23180 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23181 ; SSE41-NEXT: movdqa %xmm0, %xmm2
23182 ; SSE41-NEXT: pand %xmm1, %xmm2
23183 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23184 ; SSE41-NEXT: movdqa %xmm3, %xmm4
23185 ; SSE41-NEXT: pshufb %xmm2, %xmm4
23186 ; SSE41-NEXT: psrlw $4, %xmm0
23187 ; SSE41-NEXT: pand %xmm1, %xmm0
23188 ; SSE41-NEXT: pshufb %xmm0, %xmm3
23189 ; SSE41-NEXT: paddb %xmm4, %xmm3
23190 ; SSE41-NEXT: pxor %xmm0, %xmm0
23191 ; SSE41-NEXT: psadbw %xmm3, %xmm0
23192 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
23193 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
23196 ; AVX1-LABEL: ugt_22_v2i64:
23198 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23199 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
23200 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23201 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
23202 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
23203 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
23204 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
23205 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
23206 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
23207 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23208 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
23211 ; AVX2-LABEL: ugt_22_v2i64:
23213 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23214 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
23215 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23216 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
23217 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
23218 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
23219 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
23220 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
23221 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
23222 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23223 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
23226 ; AVX512VPOPCNTDQ-LABEL: ugt_22_v2i64:
23227 ; AVX512VPOPCNTDQ: # %bb.0:
23228 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
23229 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
23230 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
23231 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
23232 ; AVX512VPOPCNTDQ-NEXT: retq
23234 ; AVX512VPOPCNTDQVL-LABEL: ugt_22_v2i64:
23235 ; AVX512VPOPCNTDQVL: # %bb.0:
23236 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
23237 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [22,22]
23238 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
23239 ; AVX512VPOPCNTDQVL-NEXT: retq
23241 ; BITALG_NOVLX-LABEL: ugt_22_v2i64:
23242 ; BITALG_NOVLX: # %bb.0:
23243 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
23244 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
23245 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
23246 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23247 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
23248 ; BITALG_NOVLX-NEXT: vzeroupper
23249 ; BITALG_NOVLX-NEXT: retq
23251 ; BITALG-LABEL: ugt_22_v2i64:
23253 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
23254 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
23255 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23256 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [22,22]
23257 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
23258 ; BITALG-NEXT: retq
23259 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
23260 %3 = icmp ugt <2 x i64> %2, <i64 22, i64 22>
23261 %4 = sext <2 x i1> %3 to <2 x i64>
23265 define <2 x i64> @ult_23_v2i64(<2 x i64> %0) {
23266 ; SSE2-LABEL: ult_23_v2i64:
23268 ; SSE2-NEXT: movdqa %xmm0, %xmm1
23269 ; SSE2-NEXT: psrlw $1, %xmm1
23270 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23271 ; SSE2-NEXT: psubb %xmm1, %xmm0
23272 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23273 ; SSE2-NEXT: movdqa %xmm0, %xmm2
23274 ; SSE2-NEXT: pand %xmm1, %xmm2
23275 ; SSE2-NEXT: psrlw $2, %xmm0
23276 ; SSE2-NEXT: pand %xmm1, %xmm0
23277 ; SSE2-NEXT: paddb %xmm2, %xmm0
23278 ; SSE2-NEXT: movdqa %xmm0, %xmm1
23279 ; SSE2-NEXT: psrlw $4, %xmm1
23280 ; SSE2-NEXT: paddb %xmm0, %xmm1
23281 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23282 ; SSE2-NEXT: pxor %xmm0, %xmm0
23283 ; SSE2-NEXT: psadbw %xmm1, %xmm0
23284 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
23285 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [23,23,23,23]
23286 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
23289 ; SSE3-LABEL: ult_23_v2i64:
23291 ; SSE3-NEXT: movdqa %xmm0, %xmm1
23292 ; SSE3-NEXT: psrlw $1, %xmm1
23293 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23294 ; SSE3-NEXT: psubb %xmm1, %xmm0
23295 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23296 ; SSE3-NEXT: movdqa %xmm0, %xmm2
23297 ; SSE3-NEXT: pand %xmm1, %xmm2
23298 ; SSE3-NEXT: psrlw $2, %xmm0
23299 ; SSE3-NEXT: pand %xmm1, %xmm0
23300 ; SSE3-NEXT: paddb %xmm2, %xmm0
23301 ; SSE3-NEXT: movdqa %xmm0, %xmm1
23302 ; SSE3-NEXT: psrlw $4, %xmm1
23303 ; SSE3-NEXT: paddb %xmm0, %xmm1
23304 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23305 ; SSE3-NEXT: pxor %xmm0, %xmm0
23306 ; SSE3-NEXT: psadbw %xmm1, %xmm0
23307 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
23308 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [23,23,23,23]
23309 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
23312 ; SSSE3-LABEL: ult_23_v2i64:
23314 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23315 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
23316 ; SSSE3-NEXT: pand %xmm1, %xmm2
23317 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23318 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
23319 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
23320 ; SSSE3-NEXT: psrlw $4, %xmm0
23321 ; SSSE3-NEXT: pand %xmm1, %xmm0
23322 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
23323 ; SSSE3-NEXT: paddb %xmm4, %xmm3
23324 ; SSSE3-NEXT: pxor %xmm0, %xmm0
23325 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
23326 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
23327 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [23,23,23,23]
23328 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
23331 ; SSE41-LABEL: ult_23_v2i64:
23333 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23334 ; SSE41-NEXT: movdqa %xmm0, %xmm2
23335 ; SSE41-NEXT: pand %xmm1, %xmm2
23336 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23337 ; SSE41-NEXT: movdqa %xmm3, %xmm4
23338 ; SSE41-NEXT: pshufb %xmm2, %xmm4
23339 ; SSE41-NEXT: psrlw $4, %xmm0
23340 ; SSE41-NEXT: pand %xmm1, %xmm0
23341 ; SSE41-NEXT: pshufb %xmm0, %xmm3
23342 ; SSE41-NEXT: paddb %xmm4, %xmm3
23343 ; SSE41-NEXT: pxor %xmm0, %xmm0
23344 ; SSE41-NEXT: psadbw %xmm3, %xmm0
23345 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
23346 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [23,23,23,23]
23347 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
23350 ; AVX1-LABEL: ult_23_v2i64:
23352 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23353 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
23354 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23355 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
23356 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
23357 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
23358 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
23359 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
23360 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
23361 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23362 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [23,23]
23363 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
23366 ; AVX2-LABEL: ult_23_v2i64:
23368 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23369 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
23370 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23371 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
23372 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
23373 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
23374 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
23375 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
23376 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
23377 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23378 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [23,23]
23379 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
23382 ; AVX512VPOPCNTDQ-LABEL: ult_23_v2i64:
23383 ; AVX512VPOPCNTDQ: # %bb.0:
23384 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
23385 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
23386 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [23,23]
23387 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
23388 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
23389 ; AVX512VPOPCNTDQ-NEXT: retq
23391 ; AVX512VPOPCNTDQVL-LABEL: ult_23_v2i64:
23392 ; AVX512VPOPCNTDQVL: # %bb.0:
23393 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
23394 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [23,23]
23395 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
23396 ; AVX512VPOPCNTDQVL-NEXT: retq
23398 ; BITALG_NOVLX-LABEL: ult_23_v2i64:
23399 ; BITALG_NOVLX: # %bb.0:
23400 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
23401 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
23402 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
23403 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23404 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [23,23]
23405 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
23406 ; BITALG_NOVLX-NEXT: vzeroupper
23407 ; BITALG_NOVLX-NEXT: retq
23409 ; BITALG-LABEL: ult_23_v2i64:
23411 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
23412 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
23413 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23414 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [23,23]
23415 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
23416 ; BITALG-NEXT: retq
23417 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
23418 %3 = icmp ult <2 x i64> %2, <i64 23, i64 23>
23419 %4 = sext <2 x i1> %3 to <2 x i64>
23423 define <2 x i64> @ugt_23_v2i64(<2 x i64> %0) {
23424 ; SSE2-LABEL: ugt_23_v2i64:
23426 ; SSE2-NEXT: movdqa %xmm0, %xmm1
23427 ; SSE2-NEXT: psrlw $1, %xmm1
23428 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23429 ; SSE2-NEXT: psubb %xmm1, %xmm0
23430 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23431 ; SSE2-NEXT: movdqa %xmm0, %xmm2
23432 ; SSE2-NEXT: pand %xmm1, %xmm2
23433 ; SSE2-NEXT: psrlw $2, %xmm0
23434 ; SSE2-NEXT: pand %xmm1, %xmm0
23435 ; SSE2-NEXT: paddb %xmm2, %xmm0
23436 ; SSE2-NEXT: movdqa %xmm0, %xmm1
23437 ; SSE2-NEXT: psrlw $4, %xmm1
23438 ; SSE2-NEXT: paddb %xmm0, %xmm1
23439 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23440 ; SSE2-NEXT: pxor %xmm0, %xmm0
23441 ; SSE2-NEXT: psadbw %xmm1, %xmm0
23442 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
23443 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
23446 ; SSE3-LABEL: ugt_23_v2i64:
23448 ; SSE3-NEXT: movdqa %xmm0, %xmm1
23449 ; SSE3-NEXT: psrlw $1, %xmm1
23450 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23451 ; SSE3-NEXT: psubb %xmm1, %xmm0
23452 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23453 ; SSE3-NEXT: movdqa %xmm0, %xmm2
23454 ; SSE3-NEXT: pand %xmm1, %xmm2
23455 ; SSE3-NEXT: psrlw $2, %xmm0
23456 ; SSE3-NEXT: pand %xmm1, %xmm0
23457 ; SSE3-NEXT: paddb %xmm2, %xmm0
23458 ; SSE3-NEXT: movdqa %xmm0, %xmm1
23459 ; SSE3-NEXT: psrlw $4, %xmm1
23460 ; SSE3-NEXT: paddb %xmm0, %xmm1
23461 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23462 ; SSE3-NEXT: pxor %xmm0, %xmm0
23463 ; SSE3-NEXT: psadbw %xmm1, %xmm0
23464 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
23465 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
23468 ; SSSE3-LABEL: ugt_23_v2i64:
23470 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23471 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
23472 ; SSSE3-NEXT: pand %xmm1, %xmm2
23473 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23474 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
23475 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
23476 ; SSSE3-NEXT: psrlw $4, %xmm0
23477 ; SSSE3-NEXT: pand %xmm1, %xmm0
23478 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
23479 ; SSSE3-NEXT: paddb %xmm4, %xmm3
23480 ; SSSE3-NEXT: pxor %xmm0, %xmm0
23481 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
23482 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
23483 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
23486 ; SSE41-LABEL: ugt_23_v2i64:
23488 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23489 ; SSE41-NEXT: movdqa %xmm0, %xmm2
23490 ; SSE41-NEXT: pand %xmm1, %xmm2
23491 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23492 ; SSE41-NEXT: movdqa %xmm3, %xmm4
23493 ; SSE41-NEXT: pshufb %xmm2, %xmm4
23494 ; SSE41-NEXT: psrlw $4, %xmm0
23495 ; SSE41-NEXT: pand %xmm1, %xmm0
23496 ; SSE41-NEXT: pshufb %xmm0, %xmm3
23497 ; SSE41-NEXT: paddb %xmm4, %xmm3
23498 ; SSE41-NEXT: pxor %xmm0, %xmm0
23499 ; SSE41-NEXT: psadbw %xmm3, %xmm0
23500 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
23501 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
23504 ; AVX1-LABEL: ugt_23_v2i64:
23506 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23507 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
23508 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23509 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
23510 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
23511 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
23512 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
23513 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
23514 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
23515 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23516 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
23519 ; AVX2-LABEL: ugt_23_v2i64:
23521 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23522 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
23523 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23524 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
23525 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
23526 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
23527 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
23528 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
23529 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
23530 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23531 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
23534 ; AVX512VPOPCNTDQ-LABEL: ugt_23_v2i64:
23535 ; AVX512VPOPCNTDQ: # %bb.0:
23536 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
23537 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
23538 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
23539 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
23540 ; AVX512VPOPCNTDQ-NEXT: retq
23542 ; AVX512VPOPCNTDQVL-LABEL: ugt_23_v2i64:
23543 ; AVX512VPOPCNTDQVL: # %bb.0:
23544 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
23545 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [23,23]
23546 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
23547 ; AVX512VPOPCNTDQVL-NEXT: retq
23549 ; BITALG_NOVLX-LABEL: ugt_23_v2i64:
23550 ; BITALG_NOVLX: # %bb.0:
23551 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
23552 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
23553 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
23554 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23555 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
23556 ; BITALG_NOVLX-NEXT: vzeroupper
23557 ; BITALG_NOVLX-NEXT: retq
23559 ; BITALG-LABEL: ugt_23_v2i64:
23561 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
23562 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
23563 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23564 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [23,23]
23565 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
23566 ; BITALG-NEXT: retq
23567 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
23568 %3 = icmp ugt <2 x i64> %2, <i64 23, i64 23>
23569 %4 = sext <2 x i1> %3 to <2 x i64>
23573 define <2 x i64> @ult_24_v2i64(<2 x i64> %0) {
23574 ; SSE2-LABEL: ult_24_v2i64:
23576 ; SSE2-NEXT: movdqa %xmm0, %xmm1
23577 ; SSE2-NEXT: psrlw $1, %xmm1
23578 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23579 ; SSE2-NEXT: psubb %xmm1, %xmm0
23580 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23581 ; SSE2-NEXT: movdqa %xmm0, %xmm2
23582 ; SSE2-NEXT: pand %xmm1, %xmm2
23583 ; SSE2-NEXT: psrlw $2, %xmm0
23584 ; SSE2-NEXT: pand %xmm1, %xmm0
23585 ; SSE2-NEXT: paddb %xmm2, %xmm0
23586 ; SSE2-NEXT: movdqa %xmm0, %xmm1
23587 ; SSE2-NEXT: psrlw $4, %xmm1
23588 ; SSE2-NEXT: paddb %xmm0, %xmm1
23589 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23590 ; SSE2-NEXT: pxor %xmm0, %xmm0
23591 ; SSE2-NEXT: psadbw %xmm1, %xmm0
23592 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
23593 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [24,24,24,24]
23594 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
23597 ; SSE3-LABEL: ult_24_v2i64:
23599 ; SSE3-NEXT: movdqa %xmm0, %xmm1
23600 ; SSE3-NEXT: psrlw $1, %xmm1
23601 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23602 ; SSE3-NEXT: psubb %xmm1, %xmm0
23603 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23604 ; SSE3-NEXT: movdqa %xmm0, %xmm2
23605 ; SSE3-NEXT: pand %xmm1, %xmm2
23606 ; SSE3-NEXT: psrlw $2, %xmm0
23607 ; SSE3-NEXT: pand %xmm1, %xmm0
23608 ; SSE3-NEXT: paddb %xmm2, %xmm0
23609 ; SSE3-NEXT: movdqa %xmm0, %xmm1
23610 ; SSE3-NEXT: psrlw $4, %xmm1
23611 ; SSE3-NEXT: paddb %xmm0, %xmm1
23612 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23613 ; SSE3-NEXT: pxor %xmm0, %xmm0
23614 ; SSE3-NEXT: psadbw %xmm1, %xmm0
23615 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
23616 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [24,24,24,24]
23617 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
23620 ; SSSE3-LABEL: ult_24_v2i64:
23622 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23623 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
23624 ; SSSE3-NEXT: pand %xmm1, %xmm2
23625 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23626 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
23627 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
23628 ; SSSE3-NEXT: psrlw $4, %xmm0
23629 ; SSSE3-NEXT: pand %xmm1, %xmm0
23630 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
23631 ; SSSE3-NEXT: paddb %xmm4, %xmm3
23632 ; SSSE3-NEXT: pxor %xmm0, %xmm0
23633 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
23634 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
23635 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [24,24,24,24]
23636 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
23639 ; SSE41-LABEL: ult_24_v2i64:
23641 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23642 ; SSE41-NEXT: movdqa %xmm0, %xmm2
23643 ; SSE41-NEXT: pand %xmm1, %xmm2
23644 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23645 ; SSE41-NEXT: movdqa %xmm3, %xmm4
23646 ; SSE41-NEXT: pshufb %xmm2, %xmm4
23647 ; SSE41-NEXT: psrlw $4, %xmm0
23648 ; SSE41-NEXT: pand %xmm1, %xmm0
23649 ; SSE41-NEXT: pshufb %xmm0, %xmm3
23650 ; SSE41-NEXT: paddb %xmm4, %xmm3
23651 ; SSE41-NEXT: pxor %xmm0, %xmm0
23652 ; SSE41-NEXT: psadbw %xmm3, %xmm0
23653 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
23654 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [24,24,24,24]
23655 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
23658 ; AVX1-LABEL: ult_24_v2i64:
23660 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23661 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
23662 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23663 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
23664 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
23665 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
23666 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
23667 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
23668 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
23669 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23670 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [24,24]
23671 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
23674 ; AVX2-LABEL: ult_24_v2i64:
23676 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23677 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
23678 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23679 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
23680 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
23681 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
23682 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
23683 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
23684 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
23685 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23686 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [24,24]
23687 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
23690 ; AVX512VPOPCNTDQ-LABEL: ult_24_v2i64:
23691 ; AVX512VPOPCNTDQ: # %bb.0:
23692 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
23693 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
23694 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [24,24]
23695 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
23696 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
23697 ; AVX512VPOPCNTDQ-NEXT: retq
23699 ; AVX512VPOPCNTDQVL-LABEL: ult_24_v2i64:
23700 ; AVX512VPOPCNTDQVL: # %bb.0:
23701 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
23702 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [24,24]
23703 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
23704 ; AVX512VPOPCNTDQVL-NEXT: retq
23706 ; BITALG_NOVLX-LABEL: ult_24_v2i64:
23707 ; BITALG_NOVLX: # %bb.0:
23708 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
23709 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
23710 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
23711 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23712 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [24,24]
23713 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
23714 ; BITALG_NOVLX-NEXT: vzeroupper
23715 ; BITALG_NOVLX-NEXT: retq
23717 ; BITALG-LABEL: ult_24_v2i64:
23719 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
23720 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
23721 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23722 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [24,24]
23723 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
23724 ; BITALG-NEXT: retq
23725 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
23726 %3 = icmp ult <2 x i64> %2, <i64 24, i64 24>
23727 %4 = sext <2 x i1> %3 to <2 x i64>
23731 define <2 x i64> @ugt_24_v2i64(<2 x i64> %0) {
23732 ; SSE2-LABEL: ugt_24_v2i64:
23734 ; SSE2-NEXT: movdqa %xmm0, %xmm1
23735 ; SSE2-NEXT: psrlw $1, %xmm1
23736 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23737 ; SSE2-NEXT: psubb %xmm1, %xmm0
23738 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23739 ; SSE2-NEXT: movdqa %xmm0, %xmm2
23740 ; SSE2-NEXT: pand %xmm1, %xmm2
23741 ; SSE2-NEXT: psrlw $2, %xmm0
23742 ; SSE2-NEXT: pand %xmm1, %xmm0
23743 ; SSE2-NEXT: paddb %xmm2, %xmm0
23744 ; SSE2-NEXT: movdqa %xmm0, %xmm1
23745 ; SSE2-NEXT: psrlw $4, %xmm1
23746 ; SSE2-NEXT: paddb %xmm0, %xmm1
23747 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23748 ; SSE2-NEXT: pxor %xmm0, %xmm0
23749 ; SSE2-NEXT: psadbw %xmm1, %xmm0
23750 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
23751 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
23754 ; SSE3-LABEL: ugt_24_v2i64:
23756 ; SSE3-NEXT: movdqa %xmm0, %xmm1
23757 ; SSE3-NEXT: psrlw $1, %xmm1
23758 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23759 ; SSE3-NEXT: psubb %xmm1, %xmm0
23760 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23761 ; SSE3-NEXT: movdqa %xmm0, %xmm2
23762 ; SSE3-NEXT: pand %xmm1, %xmm2
23763 ; SSE3-NEXT: psrlw $2, %xmm0
23764 ; SSE3-NEXT: pand %xmm1, %xmm0
23765 ; SSE3-NEXT: paddb %xmm2, %xmm0
23766 ; SSE3-NEXT: movdqa %xmm0, %xmm1
23767 ; SSE3-NEXT: psrlw $4, %xmm1
23768 ; SSE3-NEXT: paddb %xmm0, %xmm1
23769 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23770 ; SSE3-NEXT: pxor %xmm0, %xmm0
23771 ; SSE3-NEXT: psadbw %xmm1, %xmm0
23772 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
23773 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
23776 ; SSSE3-LABEL: ugt_24_v2i64:
23778 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23779 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
23780 ; SSSE3-NEXT: pand %xmm1, %xmm2
23781 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23782 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
23783 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
23784 ; SSSE3-NEXT: psrlw $4, %xmm0
23785 ; SSSE3-NEXT: pand %xmm1, %xmm0
23786 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
23787 ; SSSE3-NEXT: paddb %xmm4, %xmm3
23788 ; SSSE3-NEXT: pxor %xmm0, %xmm0
23789 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
23790 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
23791 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
23794 ; SSE41-LABEL: ugt_24_v2i64:
23796 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23797 ; SSE41-NEXT: movdqa %xmm0, %xmm2
23798 ; SSE41-NEXT: pand %xmm1, %xmm2
23799 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23800 ; SSE41-NEXT: movdqa %xmm3, %xmm4
23801 ; SSE41-NEXT: pshufb %xmm2, %xmm4
23802 ; SSE41-NEXT: psrlw $4, %xmm0
23803 ; SSE41-NEXT: pand %xmm1, %xmm0
23804 ; SSE41-NEXT: pshufb %xmm0, %xmm3
23805 ; SSE41-NEXT: paddb %xmm4, %xmm3
23806 ; SSE41-NEXT: pxor %xmm0, %xmm0
23807 ; SSE41-NEXT: psadbw %xmm3, %xmm0
23808 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
23809 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
23812 ; AVX1-LABEL: ugt_24_v2i64:
23814 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23815 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
23816 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23817 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
23818 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
23819 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
23820 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
23821 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
23822 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
23823 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23824 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
23827 ; AVX2-LABEL: ugt_24_v2i64:
23829 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23830 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
23831 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23832 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
23833 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
23834 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
23835 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
23836 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
23837 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
23838 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23839 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
23842 ; AVX512VPOPCNTDQ-LABEL: ugt_24_v2i64:
23843 ; AVX512VPOPCNTDQ: # %bb.0:
23844 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
23845 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
23846 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
23847 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
23848 ; AVX512VPOPCNTDQ-NEXT: retq
23850 ; AVX512VPOPCNTDQVL-LABEL: ugt_24_v2i64:
23851 ; AVX512VPOPCNTDQVL: # %bb.0:
23852 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
23853 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [24,24]
23854 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
23855 ; AVX512VPOPCNTDQVL-NEXT: retq
23857 ; BITALG_NOVLX-LABEL: ugt_24_v2i64:
23858 ; BITALG_NOVLX: # %bb.0:
23859 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
23860 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
23861 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
23862 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23863 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
23864 ; BITALG_NOVLX-NEXT: vzeroupper
23865 ; BITALG_NOVLX-NEXT: retq
23867 ; BITALG-LABEL: ugt_24_v2i64:
23869 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
23870 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
23871 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23872 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [24,24]
23873 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
23874 ; BITALG-NEXT: retq
23875 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
23876 %3 = icmp ugt <2 x i64> %2, <i64 24, i64 24>
23877 %4 = sext <2 x i1> %3 to <2 x i64>
23881 define <2 x i64> @ult_25_v2i64(<2 x i64> %0) {
23882 ; SSE2-LABEL: ult_25_v2i64:
23884 ; SSE2-NEXT: movdqa %xmm0, %xmm1
23885 ; SSE2-NEXT: psrlw $1, %xmm1
23886 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23887 ; SSE2-NEXT: psubb %xmm1, %xmm0
23888 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23889 ; SSE2-NEXT: movdqa %xmm0, %xmm2
23890 ; SSE2-NEXT: pand %xmm1, %xmm2
23891 ; SSE2-NEXT: psrlw $2, %xmm0
23892 ; SSE2-NEXT: pand %xmm1, %xmm0
23893 ; SSE2-NEXT: paddb %xmm2, %xmm0
23894 ; SSE2-NEXT: movdqa %xmm0, %xmm1
23895 ; SSE2-NEXT: psrlw $4, %xmm1
23896 ; SSE2-NEXT: paddb %xmm0, %xmm1
23897 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23898 ; SSE2-NEXT: pxor %xmm0, %xmm0
23899 ; SSE2-NEXT: psadbw %xmm1, %xmm0
23900 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
23901 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [25,25,25,25]
23902 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
23905 ; SSE3-LABEL: ult_25_v2i64:
23907 ; SSE3-NEXT: movdqa %xmm0, %xmm1
23908 ; SSE3-NEXT: psrlw $1, %xmm1
23909 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23910 ; SSE3-NEXT: psubb %xmm1, %xmm0
23911 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
23912 ; SSE3-NEXT: movdqa %xmm0, %xmm2
23913 ; SSE3-NEXT: pand %xmm1, %xmm2
23914 ; SSE3-NEXT: psrlw $2, %xmm0
23915 ; SSE3-NEXT: pand %xmm1, %xmm0
23916 ; SSE3-NEXT: paddb %xmm2, %xmm0
23917 ; SSE3-NEXT: movdqa %xmm0, %xmm1
23918 ; SSE3-NEXT: psrlw $4, %xmm1
23919 ; SSE3-NEXT: paddb %xmm0, %xmm1
23920 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
23921 ; SSE3-NEXT: pxor %xmm0, %xmm0
23922 ; SSE3-NEXT: psadbw %xmm1, %xmm0
23923 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
23924 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [25,25,25,25]
23925 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
23928 ; SSSE3-LABEL: ult_25_v2i64:
23930 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23931 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
23932 ; SSSE3-NEXT: pand %xmm1, %xmm2
23933 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23934 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
23935 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
23936 ; SSSE3-NEXT: psrlw $4, %xmm0
23937 ; SSSE3-NEXT: pand %xmm1, %xmm0
23938 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
23939 ; SSSE3-NEXT: paddb %xmm4, %xmm3
23940 ; SSSE3-NEXT: pxor %xmm0, %xmm0
23941 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
23942 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
23943 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [25,25,25,25]
23944 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
23947 ; SSE41-LABEL: ult_25_v2i64:
23949 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23950 ; SSE41-NEXT: movdqa %xmm0, %xmm2
23951 ; SSE41-NEXT: pand %xmm1, %xmm2
23952 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23953 ; SSE41-NEXT: movdqa %xmm3, %xmm4
23954 ; SSE41-NEXT: pshufb %xmm2, %xmm4
23955 ; SSE41-NEXT: psrlw $4, %xmm0
23956 ; SSE41-NEXT: pand %xmm1, %xmm0
23957 ; SSE41-NEXT: pshufb %xmm0, %xmm3
23958 ; SSE41-NEXT: paddb %xmm4, %xmm3
23959 ; SSE41-NEXT: pxor %xmm0, %xmm0
23960 ; SSE41-NEXT: psadbw %xmm3, %xmm0
23961 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
23962 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [25,25,25,25]
23963 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
23966 ; AVX1-LABEL: ult_25_v2i64:
23968 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23969 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
23970 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23971 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
23972 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
23973 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
23974 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
23975 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
23976 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
23977 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23978 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [25,25]
23979 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
23982 ; AVX2-LABEL: ult_25_v2i64:
23984 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
23985 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
23986 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
23987 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
23988 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
23989 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
23990 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
23991 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
23992 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
23993 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
23994 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [25,25]
23995 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
23998 ; AVX512VPOPCNTDQ-LABEL: ult_25_v2i64:
23999 ; AVX512VPOPCNTDQ: # %bb.0:
24000 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
24001 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
24002 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [25,25]
24003 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
24004 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
24005 ; AVX512VPOPCNTDQ-NEXT: retq
24007 ; AVX512VPOPCNTDQVL-LABEL: ult_25_v2i64:
24008 ; AVX512VPOPCNTDQVL: # %bb.0:
24009 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
24010 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [25,25]
24011 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
24012 ; AVX512VPOPCNTDQVL-NEXT: retq
24014 ; BITALG_NOVLX-LABEL: ult_25_v2i64:
24015 ; BITALG_NOVLX: # %bb.0:
24016 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
24017 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
24018 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
24019 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24020 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [25,25]
24021 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
24022 ; BITALG_NOVLX-NEXT: vzeroupper
24023 ; BITALG_NOVLX-NEXT: retq
24025 ; BITALG-LABEL: ult_25_v2i64:
24027 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
24028 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
24029 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24030 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [25,25]
24031 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
24032 ; BITALG-NEXT: retq
24033 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
24034 %3 = icmp ult <2 x i64> %2, <i64 25, i64 25>
24035 %4 = sext <2 x i1> %3 to <2 x i64>
24039 define <2 x i64> @ugt_25_v2i64(<2 x i64> %0) {
24040 ; SSE2-LABEL: ugt_25_v2i64:
24042 ; SSE2-NEXT: movdqa %xmm0, %xmm1
24043 ; SSE2-NEXT: psrlw $1, %xmm1
24044 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24045 ; SSE2-NEXT: psubb %xmm1, %xmm0
24046 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24047 ; SSE2-NEXT: movdqa %xmm0, %xmm2
24048 ; SSE2-NEXT: pand %xmm1, %xmm2
24049 ; SSE2-NEXT: psrlw $2, %xmm0
24050 ; SSE2-NEXT: pand %xmm1, %xmm0
24051 ; SSE2-NEXT: paddb %xmm2, %xmm0
24052 ; SSE2-NEXT: movdqa %xmm0, %xmm1
24053 ; SSE2-NEXT: psrlw $4, %xmm1
24054 ; SSE2-NEXT: paddb %xmm0, %xmm1
24055 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24056 ; SSE2-NEXT: pxor %xmm0, %xmm0
24057 ; SSE2-NEXT: psadbw %xmm1, %xmm0
24058 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
24059 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
24062 ; SSE3-LABEL: ugt_25_v2i64:
24064 ; SSE3-NEXT: movdqa %xmm0, %xmm1
24065 ; SSE3-NEXT: psrlw $1, %xmm1
24066 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24067 ; SSE3-NEXT: psubb %xmm1, %xmm0
24068 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24069 ; SSE3-NEXT: movdqa %xmm0, %xmm2
24070 ; SSE3-NEXT: pand %xmm1, %xmm2
24071 ; SSE3-NEXT: psrlw $2, %xmm0
24072 ; SSE3-NEXT: pand %xmm1, %xmm0
24073 ; SSE3-NEXT: paddb %xmm2, %xmm0
24074 ; SSE3-NEXT: movdqa %xmm0, %xmm1
24075 ; SSE3-NEXT: psrlw $4, %xmm1
24076 ; SSE3-NEXT: paddb %xmm0, %xmm1
24077 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24078 ; SSE3-NEXT: pxor %xmm0, %xmm0
24079 ; SSE3-NEXT: psadbw %xmm1, %xmm0
24080 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
24081 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
24084 ; SSSE3-LABEL: ugt_25_v2i64:
24086 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24087 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
24088 ; SSSE3-NEXT: pand %xmm1, %xmm2
24089 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24090 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
24091 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
24092 ; SSSE3-NEXT: psrlw $4, %xmm0
24093 ; SSSE3-NEXT: pand %xmm1, %xmm0
24094 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
24095 ; SSSE3-NEXT: paddb %xmm4, %xmm3
24096 ; SSSE3-NEXT: pxor %xmm0, %xmm0
24097 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
24098 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
24099 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
24102 ; SSE41-LABEL: ugt_25_v2i64:
24104 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24105 ; SSE41-NEXT: movdqa %xmm0, %xmm2
24106 ; SSE41-NEXT: pand %xmm1, %xmm2
24107 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24108 ; SSE41-NEXT: movdqa %xmm3, %xmm4
24109 ; SSE41-NEXT: pshufb %xmm2, %xmm4
24110 ; SSE41-NEXT: psrlw $4, %xmm0
24111 ; SSE41-NEXT: pand %xmm1, %xmm0
24112 ; SSE41-NEXT: pshufb %xmm0, %xmm3
24113 ; SSE41-NEXT: paddb %xmm4, %xmm3
24114 ; SSE41-NEXT: pxor %xmm0, %xmm0
24115 ; SSE41-NEXT: psadbw %xmm3, %xmm0
24116 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
24117 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
24120 ; AVX1-LABEL: ugt_25_v2i64:
24122 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24123 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
24124 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24125 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
24126 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
24127 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
24128 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
24129 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
24130 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
24131 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24132 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
24135 ; AVX2-LABEL: ugt_25_v2i64:
24137 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24138 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
24139 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24140 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
24141 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
24142 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
24143 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
24144 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
24145 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
24146 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24147 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
24150 ; AVX512VPOPCNTDQ-LABEL: ugt_25_v2i64:
24151 ; AVX512VPOPCNTDQ: # %bb.0:
24152 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
24153 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
24154 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
24155 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
24156 ; AVX512VPOPCNTDQ-NEXT: retq
24158 ; AVX512VPOPCNTDQVL-LABEL: ugt_25_v2i64:
24159 ; AVX512VPOPCNTDQVL: # %bb.0:
24160 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
24161 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [25,25]
24162 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
24163 ; AVX512VPOPCNTDQVL-NEXT: retq
24165 ; BITALG_NOVLX-LABEL: ugt_25_v2i64:
24166 ; BITALG_NOVLX: # %bb.0:
24167 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
24168 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
24169 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
24170 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24171 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
24172 ; BITALG_NOVLX-NEXT: vzeroupper
24173 ; BITALG_NOVLX-NEXT: retq
24175 ; BITALG-LABEL: ugt_25_v2i64:
24177 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
24178 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
24179 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24180 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [25,25]
24181 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
24182 ; BITALG-NEXT: retq
24183 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
24184 %3 = icmp ugt <2 x i64> %2, <i64 25, i64 25>
24185 %4 = sext <2 x i1> %3 to <2 x i64>
24189 define <2 x i64> @ult_26_v2i64(<2 x i64> %0) {
24190 ; SSE2-LABEL: ult_26_v2i64:
24192 ; SSE2-NEXT: movdqa %xmm0, %xmm1
24193 ; SSE2-NEXT: psrlw $1, %xmm1
24194 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24195 ; SSE2-NEXT: psubb %xmm1, %xmm0
24196 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24197 ; SSE2-NEXT: movdqa %xmm0, %xmm2
24198 ; SSE2-NEXT: pand %xmm1, %xmm2
24199 ; SSE2-NEXT: psrlw $2, %xmm0
24200 ; SSE2-NEXT: pand %xmm1, %xmm0
24201 ; SSE2-NEXT: paddb %xmm2, %xmm0
24202 ; SSE2-NEXT: movdqa %xmm0, %xmm1
24203 ; SSE2-NEXT: psrlw $4, %xmm1
24204 ; SSE2-NEXT: paddb %xmm0, %xmm1
24205 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24206 ; SSE2-NEXT: pxor %xmm0, %xmm0
24207 ; SSE2-NEXT: psadbw %xmm1, %xmm0
24208 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
24209 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [26,26,26,26]
24210 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
24213 ; SSE3-LABEL: ult_26_v2i64:
24215 ; SSE3-NEXT: movdqa %xmm0, %xmm1
24216 ; SSE3-NEXT: psrlw $1, %xmm1
24217 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24218 ; SSE3-NEXT: psubb %xmm1, %xmm0
24219 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24220 ; SSE3-NEXT: movdqa %xmm0, %xmm2
24221 ; SSE3-NEXT: pand %xmm1, %xmm2
24222 ; SSE3-NEXT: psrlw $2, %xmm0
24223 ; SSE3-NEXT: pand %xmm1, %xmm0
24224 ; SSE3-NEXT: paddb %xmm2, %xmm0
24225 ; SSE3-NEXT: movdqa %xmm0, %xmm1
24226 ; SSE3-NEXT: psrlw $4, %xmm1
24227 ; SSE3-NEXT: paddb %xmm0, %xmm1
24228 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24229 ; SSE3-NEXT: pxor %xmm0, %xmm0
24230 ; SSE3-NEXT: psadbw %xmm1, %xmm0
24231 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
24232 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [26,26,26,26]
24233 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
24236 ; SSSE3-LABEL: ult_26_v2i64:
24238 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24239 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
24240 ; SSSE3-NEXT: pand %xmm1, %xmm2
24241 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24242 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
24243 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
24244 ; SSSE3-NEXT: psrlw $4, %xmm0
24245 ; SSSE3-NEXT: pand %xmm1, %xmm0
24246 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
24247 ; SSSE3-NEXT: paddb %xmm4, %xmm3
24248 ; SSSE3-NEXT: pxor %xmm0, %xmm0
24249 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
24250 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
24251 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [26,26,26,26]
24252 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
24255 ; SSE41-LABEL: ult_26_v2i64:
24257 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24258 ; SSE41-NEXT: movdqa %xmm0, %xmm2
24259 ; SSE41-NEXT: pand %xmm1, %xmm2
24260 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24261 ; SSE41-NEXT: movdqa %xmm3, %xmm4
24262 ; SSE41-NEXT: pshufb %xmm2, %xmm4
24263 ; SSE41-NEXT: psrlw $4, %xmm0
24264 ; SSE41-NEXT: pand %xmm1, %xmm0
24265 ; SSE41-NEXT: pshufb %xmm0, %xmm3
24266 ; SSE41-NEXT: paddb %xmm4, %xmm3
24267 ; SSE41-NEXT: pxor %xmm0, %xmm0
24268 ; SSE41-NEXT: psadbw %xmm3, %xmm0
24269 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
24270 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [26,26,26,26]
24271 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
24274 ; AVX1-LABEL: ult_26_v2i64:
24276 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24277 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
24278 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24279 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
24280 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
24281 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
24282 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
24283 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
24284 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
24285 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24286 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [26,26]
24287 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
24290 ; AVX2-LABEL: ult_26_v2i64:
24292 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24293 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
24294 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24295 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
24296 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
24297 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
24298 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
24299 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
24300 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
24301 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24302 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [26,26]
24303 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
24306 ; AVX512VPOPCNTDQ-LABEL: ult_26_v2i64:
24307 ; AVX512VPOPCNTDQ: # %bb.0:
24308 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
24309 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
24310 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [26,26]
24311 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
24312 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
24313 ; AVX512VPOPCNTDQ-NEXT: retq
24315 ; AVX512VPOPCNTDQVL-LABEL: ult_26_v2i64:
24316 ; AVX512VPOPCNTDQVL: # %bb.0:
24317 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
24318 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [26,26]
24319 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
24320 ; AVX512VPOPCNTDQVL-NEXT: retq
24322 ; BITALG_NOVLX-LABEL: ult_26_v2i64:
24323 ; BITALG_NOVLX: # %bb.0:
24324 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
24325 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
24326 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
24327 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24328 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [26,26]
24329 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
24330 ; BITALG_NOVLX-NEXT: vzeroupper
24331 ; BITALG_NOVLX-NEXT: retq
24333 ; BITALG-LABEL: ult_26_v2i64:
24335 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
24336 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
24337 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24338 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [26,26]
24339 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
24340 ; BITALG-NEXT: retq
24341 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
24342 %3 = icmp ult <2 x i64> %2, <i64 26, i64 26>
24343 %4 = sext <2 x i1> %3 to <2 x i64>
24347 define <2 x i64> @ugt_26_v2i64(<2 x i64> %0) {
24348 ; SSE2-LABEL: ugt_26_v2i64:
24350 ; SSE2-NEXT: movdqa %xmm0, %xmm1
24351 ; SSE2-NEXT: psrlw $1, %xmm1
24352 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24353 ; SSE2-NEXT: psubb %xmm1, %xmm0
24354 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24355 ; SSE2-NEXT: movdqa %xmm0, %xmm2
24356 ; SSE2-NEXT: pand %xmm1, %xmm2
24357 ; SSE2-NEXT: psrlw $2, %xmm0
24358 ; SSE2-NEXT: pand %xmm1, %xmm0
24359 ; SSE2-NEXT: paddb %xmm2, %xmm0
24360 ; SSE2-NEXT: movdqa %xmm0, %xmm1
24361 ; SSE2-NEXT: psrlw $4, %xmm1
24362 ; SSE2-NEXT: paddb %xmm0, %xmm1
24363 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24364 ; SSE2-NEXT: pxor %xmm0, %xmm0
24365 ; SSE2-NEXT: psadbw %xmm1, %xmm0
24366 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
24367 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
24370 ; SSE3-LABEL: ugt_26_v2i64:
24372 ; SSE3-NEXT: movdqa %xmm0, %xmm1
24373 ; SSE3-NEXT: psrlw $1, %xmm1
24374 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24375 ; SSE3-NEXT: psubb %xmm1, %xmm0
24376 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24377 ; SSE3-NEXT: movdqa %xmm0, %xmm2
24378 ; SSE3-NEXT: pand %xmm1, %xmm2
24379 ; SSE3-NEXT: psrlw $2, %xmm0
24380 ; SSE3-NEXT: pand %xmm1, %xmm0
24381 ; SSE3-NEXT: paddb %xmm2, %xmm0
24382 ; SSE3-NEXT: movdqa %xmm0, %xmm1
24383 ; SSE3-NEXT: psrlw $4, %xmm1
24384 ; SSE3-NEXT: paddb %xmm0, %xmm1
24385 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24386 ; SSE3-NEXT: pxor %xmm0, %xmm0
24387 ; SSE3-NEXT: psadbw %xmm1, %xmm0
24388 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
24389 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
24392 ; SSSE3-LABEL: ugt_26_v2i64:
24394 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24395 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
24396 ; SSSE3-NEXT: pand %xmm1, %xmm2
24397 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24398 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
24399 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
24400 ; SSSE3-NEXT: psrlw $4, %xmm0
24401 ; SSSE3-NEXT: pand %xmm1, %xmm0
24402 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
24403 ; SSSE3-NEXT: paddb %xmm4, %xmm3
24404 ; SSSE3-NEXT: pxor %xmm0, %xmm0
24405 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
24406 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
24407 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
24410 ; SSE41-LABEL: ugt_26_v2i64:
24412 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24413 ; SSE41-NEXT: movdqa %xmm0, %xmm2
24414 ; SSE41-NEXT: pand %xmm1, %xmm2
24415 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24416 ; SSE41-NEXT: movdqa %xmm3, %xmm4
24417 ; SSE41-NEXT: pshufb %xmm2, %xmm4
24418 ; SSE41-NEXT: psrlw $4, %xmm0
24419 ; SSE41-NEXT: pand %xmm1, %xmm0
24420 ; SSE41-NEXT: pshufb %xmm0, %xmm3
24421 ; SSE41-NEXT: paddb %xmm4, %xmm3
24422 ; SSE41-NEXT: pxor %xmm0, %xmm0
24423 ; SSE41-NEXT: psadbw %xmm3, %xmm0
24424 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
24425 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
24428 ; AVX1-LABEL: ugt_26_v2i64:
24430 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24431 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
24432 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24433 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
24434 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
24435 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
24436 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
24437 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
24438 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
24439 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24440 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
24443 ; AVX2-LABEL: ugt_26_v2i64:
24445 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24446 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
24447 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24448 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
24449 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
24450 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
24451 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
24452 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
24453 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
24454 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24455 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
24458 ; AVX512VPOPCNTDQ-LABEL: ugt_26_v2i64:
24459 ; AVX512VPOPCNTDQ: # %bb.0:
24460 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
24461 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
24462 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
24463 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
24464 ; AVX512VPOPCNTDQ-NEXT: retq
24466 ; AVX512VPOPCNTDQVL-LABEL: ugt_26_v2i64:
24467 ; AVX512VPOPCNTDQVL: # %bb.0:
24468 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
24469 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [26,26]
24470 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
24471 ; AVX512VPOPCNTDQVL-NEXT: retq
24473 ; BITALG_NOVLX-LABEL: ugt_26_v2i64:
24474 ; BITALG_NOVLX: # %bb.0:
24475 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
24476 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
24477 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
24478 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24479 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
24480 ; BITALG_NOVLX-NEXT: vzeroupper
24481 ; BITALG_NOVLX-NEXT: retq
24483 ; BITALG-LABEL: ugt_26_v2i64:
24485 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
24486 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
24487 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24488 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [26,26]
24489 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
24490 ; BITALG-NEXT: retq
24491 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
24492 %3 = icmp ugt <2 x i64> %2, <i64 26, i64 26>
24493 %4 = sext <2 x i1> %3 to <2 x i64>
24497 define <2 x i64> @ult_27_v2i64(<2 x i64> %0) {
24498 ; SSE2-LABEL: ult_27_v2i64:
24500 ; SSE2-NEXT: movdqa %xmm0, %xmm1
24501 ; SSE2-NEXT: psrlw $1, %xmm1
24502 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24503 ; SSE2-NEXT: psubb %xmm1, %xmm0
24504 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24505 ; SSE2-NEXT: movdqa %xmm0, %xmm2
24506 ; SSE2-NEXT: pand %xmm1, %xmm2
24507 ; SSE2-NEXT: psrlw $2, %xmm0
24508 ; SSE2-NEXT: pand %xmm1, %xmm0
24509 ; SSE2-NEXT: paddb %xmm2, %xmm0
24510 ; SSE2-NEXT: movdqa %xmm0, %xmm1
24511 ; SSE2-NEXT: psrlw $4, %xmm1
24512 ; SSE2-NEXT: paddb %xmm0, %xmm1
24513 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24514 ; SSE2-NEXT: pxor %xmm0, %xmm0
24515 ; SSE2-NEXT: psadbw %xmm1, %xmm0
24516 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
24517 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [27,27,27,27]
24518 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
24521 ; SSE3-LABEL: ult_27_v2i64:
24523 ; SSE3-NEXT: movdqa %xmm0, %xmm1
24524 ; SSE3-NEXT: psrlw $1, %xmm1
24525 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24526 ; SSE3-NEXT: psubb %xmm1, %xmm0
24527 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24528 ; SSE3-NEXT: movdqa %xmm0, %xmm2
24529 ; SSE3-NEXT: pand %xmm1, %xmm2
24530 ; SSE3-NEXT: psrlw $2, %xmm0
24531 ; SSE3-NEXT: pand %xmm1, %xmm0
24532 ; SSE3-NEXT: paddb %xmm2, %xmm0
24533 ; SSE3-NEXT: movdqa %xmm0, %xmm1
24534 ; SSE3-NEXT: psrlw $4, %xmm1
24535 ; SSE3-NEXT: paddb %xmm0, %xmm1
24536 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24537 ; SSE3-NEXT: pxor %xmm0, %xmm0
24538 ; SSE3-NEXT: psadbw %xmm1, %xmm0
24539 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
24540 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [27,27,27,27]
24541 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
24544 ; SSSE3-LABEL: ult_27_v2i64:
24546 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24547 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
24548 ; SSSE3-NEXT: pand %xmm1, %xmm2
24549 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24550 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
24551 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
24552 ; SSSE3-NEXT: psrlw $4, %xmm0
24553 ; SSSE3-NEXT: pand %xmm1, %xmm0
24554 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
24555 ; SSSE3-NEXT: paddb %xmm4, %xmm3
24556 ; SSSE3-NEXT: pxor %xmm0, %xmm0
24557 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
24558 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
24559 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [27,27,27,27]
24560 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
24563 ; SSE41-LABEL: ult_27_v2i64:
24565 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24566 ; SSE41-NEXT: movdqa %xmm0, %xmm2
24567 ; SSE41-NEXT: pand %xmm1, %xmm2
24568 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24569 ; SSE41-NEXT: movdqa %xmm3, %xmm4
24570 ; SSE41-NEXT: pshufb %xmm2, %xmm4
24571 ; SSE41-NEXT: psrlw $4, %xmm0
24572 ; SSE41-NEXT: pand %xmm1, %xmm0
24573 ; SSE41-NEXT: pshufb %xmm0, %xmm3
24574 ; SSE41-NEXT: paddb %xmm4, %xmm3
24575 ; SSE41-NEXT: pxor %xmm0, %xmm0
24576 ; SSE41-NEXT: psadbw %xmm3, %xmm0
24577 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
24578 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [27,27,27,27]
24579 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
24582 ; AVX1-LABEL: ult_27_v2i64:
24584 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24585 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
24586 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24587 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
24588 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
24589 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
24590 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
24591 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
24592 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
24593 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24594 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [27,27]
24595 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
24598 ; AVX2-LABEL: ult_27_v2i64:
24600 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24601 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
24602 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24603 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
24604 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
24605 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
24606 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
24607 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
24608 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
24609 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24610 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [27,27]
24611 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
24614 ; AVX512VPOPCNTDQ-LABEL: ult_27_v2i64:
24615 ; AVX512VPOPCNTDQ: # %bb.0:
24616 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
24617 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
24618 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [27,27]
24619 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
24620 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
24621 ; AVX512VPOPCNTDQ-NEXT: retq
24623 ; AVX512VPOPCNTDQVL-LABEL: ult_27_v2i64:
24624 ; AVX512VPOPCNTDQVL: # %bb.0:
24625 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
24626 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [27,27]
24627 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
24628 ; AVX512VPOPCNTDQVL-NEXT: retq
24630 ; BITALG_NOVLX-LABEL: ult_27_v2i64:
24631 ; BITALG_NOVLX: # %bb.0:
24632 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
24633 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
24634 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
24635 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24636 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [27,27]
24637 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
24638 ; BITALG_NOVLX-NEXT: vzeroupper
24639 ; BITALG_NOVLX-NEXT: retq
24641 ; BITALG-LABEL: ult_27_v2i64:
24643 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
24644 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
24645 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24646 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [27,27]
24647 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
24648 ; BITALG-NEXT: retq
24649 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
24650 %3 = icmp ult <2 x i64> %2, <i64 27, i64 27>
24651 %4 = sext <2 x i1> %3 to <2 x i64>
24655 define <2 x i64> @ugt_27_v2i64(<2 x i64> %0) {
24656 ; SSE2-LABEL: ugt_27_v2i64:
24658 ; SSE2-NEXT: movdqa %xmm0, %xmm1
24659 ; SSE2-NEXT: psrlw $1, %xmm1
24660 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24661 ; SSE2-NEXT: psubb %xmm1, %xmm0
24662 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24663 ; SSE2-NEXT: movdqa %xmm0, %xmm2
24664 ; SSE2-NEXT: pand %xmm1, %xmm2
24665 ; SSE2-NEXT: psrlw $2, %xmm0
24666 ; SSE2-NEXT: pand %xmm1, %xmm0
24667 ; SSE2-NEXT: paddb %xmm2, %xmm0
24668 ; SSE2-NEXT: movdqa %xmm0, %xmm1
24669 ; SSE2-NEXT: psrlw $4, %xmm1
24670 ; SSE2-NEXT: paddb %xmm0, %xmm1
24671 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24672 ; SSE2-NEXT: pxor %xmm0, %xmm0
24673 ; SSE2-NEXT: psadbw %xmm1, %xmm0
24674 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
24675 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
24678 ; SSE3-LABEL: ugt_27_v2i64:
24680 ; SSE3-NEXT: movdqa %xmm0, %xmm1
24681 ; SSE3-NEXT: psrlw $1, %xmm1
24682 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24683 ; SSE3-NEXT: psubb %xmm1, %xmm0
24684 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24685 ; SSE3-NEXT: movdqa %xmm0, %xmm2
24686 ; SSE3-NEXT: pand %xmm1, %xmm2
24687 ; SSE3-NEXT: psrlw $2, %xmm0
24688 ; SSE3-NEXT: pand %xmm1, %xmm0
24689 ; SSE3-NEXT: paddb %xmm2, %xmm0
24690 ; SSE3-NEXT: movdqa %xmm0, %xmm1
24691 ; SSE3-NEXT: psrlw $4, %xmm1
24692 ; SSE3-NEXT: paddb %xmm0, %xmm1
24693 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24694 ; SSE3-NEXT: pxor %xmm0, %xmm0
24695 ; SSE3-NEXT: psadbw %xmm1, %xmm0
24696 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
24697 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
24700 ; SSSE3-LABEL: ugt_27_v2i64:
24702 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24703 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
24704 ; SSSE3-NEXT: pand %xmm1, %xmm2
24705 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24706 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
24707 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
24708 ; SSSE3-NEXT: psrlw $4, %xmm0
24709 ; SSSE3-NEXT: pand %xmm1, %xmm0
24710 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
24711 ; SSSE3-NEXT: paddb %xmm4, %xmm3
24712 ; SSSE3-NEXT: pxor %xmm0, %xmm0
24713 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
24714 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
24715 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
24718 ; SSE41-LABEL: ugt_27_v2i64:
24720 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24721 ; SSE41-NEXT: movdqa %xmm0, %xmm2
24722 ; SSE41-NEXT: pand %xmm1, %xmm2
24723 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24724 ; SSE41-NEXT: movdqa %xmm3, %xmm4
24725 ; SSE41-NEXT: pshufb %xmm2, %xmm4
24726 ; SSE41-NEXT: psrlw $4, %xmm0
24727 ; SSE41-NEXT: pand %xmm1, %xmm0
24728 ; SSE41-NEXT: pshufb %xmm0, %xmm3
24729 ; SSE41-NEXT: paddb %xmm4, %xmm3
24730 ; SSE41-NEXT: pxor %xmm0, %xmm0
24731 ; SSE41-NEXT: psadbw %xmm3, %xmm0
24732 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
24733 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
24736 ; AVX1-LABEL: ugt_27_v2i64:
24738 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24739 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
24740 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24741 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
24742 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
24743 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
24744 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
24745 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
24746 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
24747 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24748 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
24751 ; AVX2-LABEL: ugt_27_v2i64:
24753 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24754 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
24755 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24756 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
24757 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
24758 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
24759 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
24760 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
24761 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
24762 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24763 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
24766 ; AVX512VPOPCNTDQ-LABEL: ugt_27_v2i64:
24767 ; AVX512VPOPCNTDQ: # %bb.0:
24768 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
24769 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
24770 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
24771 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
24772 ; AVX512VPOPCNTDQ-NEXT: retq
24774 ; AVX512VPOPCNTDQVL-LABEL: ugt_27_v2i64:
24775 ; AVX512VPOPCNTDQVL: # %bb.0:
24776 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
24777 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [27,27]
24778 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
24779 ; AVX512VPOPCNTDQVL-NEXT: retq
24781 ; BITALG_NOVLX-LABEL: ugt_27_v2i64:
24782 ; BITALG_NOVLX: # %bb.0:
24783 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
24784 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
24785 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
24786 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24787 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
24788 ; BITALG_NOVLX-NEXT: vzeroupper
24789 ; BITALG_NOVLX-NEXT: retq
24791 ; BITALG-LABEL: ugt_27_v2i64:
24793 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
24794 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
24795 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24796 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [27,27]
24797 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
24798 ; BITALG-NEXT: retq
24799 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
24800 %3 = icmp ugt <2 x i64> %2, <i64 27, i64 27>
24801 %4 = sext <2 x i1> %3 to <2 x i64>
24805 define <2 x i64> @ult_28_v2i64(<2 x i64> %0) {
24806 ; SSE2-LABEL: ult_28_v2i64:
24808 ; SSE2-NEXT: movdqa %xmm0, %xmm1
24809 ; SSE2-NEXT: psrlw $1, %xmm1
24810 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24811 ; SSE2-NEXT: psubb %xmm1, %xmm0
24812 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24813 ; SSE2-NEXT: movdqa %xmm0, %xmm2
24814 ; SSE2-NEXT: pand %xmm1, %xmm2
24815 ; SSE2-NEXT: psrlw $2, %xmm0
24816 ; SSE2-NEXT: pand %xmm1, %xmm0
24817 ; SSE2-NEXT: paddb %xmm2, %xmm0
24818 ; SSE2-NEXT: movdqa %xmm0, %xmm1
24819 ; SSE2-NEXT: psrlw $4, %xmm1
24820 ; SSE2-NEXT: paddb %xmm0, %xmm1
24821 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24822 ; SSE2-NEXT: pxor %xmm0, %xmm0
24823 ; SSE2-NEXT: psadbw %xmm1, %xmm0
24824 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
24825 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [28,28,28,28]
24826 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
24829 ; SSE3-LABEL: ult_28_v2i64:
24831 ; SSE3-NEXT: movdqa %xmm0, %xmm1
24832 ; SSE3-NEXT: psrlw $1, %xmm1
24833 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24834 ; SSE3-NEXT: psubb %xmm1, %xmm0
24835 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24836 ; SSE3-NEXT: movdqa %xmm0, %xmm2
24837 ; SSE3-NEXT: pand %xmm1, %xmm2
24838 ; SSE3-NEXT: psrlw $2, %xmm0
24839 ; SSE3-NEXT: pand %xmm1, %xmm0
24840 ; SSE3-NEXT: paddb %xmm2, %xmm0
24841 ; SSE3-NEXT: movdqa %xmm0, %xmm1
24842 ; SSE3-NEXT: psrlw $4, %xmm1
24843 ; SSE3-NEXT: paddb %xmm0, %xmm1
24844 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24845 ; SSE3-NEXT: pxor %xmm0, %xmm0
24846 ; SSE3-NEXT: psadbw %xmm1, %xmm0
24847 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
24848 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [28,28,28,28]
24849 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
24852 ; SSSE3-LABEL: ult_28_v2i64:
24854 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24855 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
24856 ; SSSE3-NEXT: pand %xmm1, %xmm2
24857 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24858 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
24859 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
24860 ; SSSE3-NEXT: psrlw $4, %xmm0
24861 ; SSSE3-NEXT: pand %xmm1, %xmm0
24862 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
24863 ; SSSE3-NEXT: paddb %xmm4, %xmm3
24864 ; SSSE3-NEXT: pxor %xmm0, %xmm0
24865 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
24866 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
24867 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [28,28,28,28]
24868 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
24871 ; SSE41-LABEL: ult_28_v2i64:
24873 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24874 ; SSE41-NEXT: movdqa %xmm0, %xmm2
24875 ; SSE41-NEXT: pand %xmm1, %xmm2
24876 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24877 ; SSE41-NEXT: movdqa %xmm3, %xmm4
24878 ; SSE41-NEXT: pshufb %xmm2, %xmm4
24879 ; SSE41-NEXT: psrlw $4, %xmm0
24880 ; SSE41-NEXT: pand %xmm1, %xmm0
24881 ; SSE41-NEXT: pshufb %xmm0, %xmm3
24882 ; SSE41-NEXT: paddb %xmm4, %xmm3
24883 ; SSE41-NEXT: pxor %xmm0, %xmm0
24884 ; SSE41-NEXT: psadbw %xmm3, %xmm0
24885 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
24886 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [28,28,28,28]
24887 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
24890 ; AVX1-LABEL: ult_28_v2i64:
24892 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24893 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
24894 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24895 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
24896 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
24897 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
24898 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
24899 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
24900 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
24901 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24902 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [28,28]
24903 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
24906 ; AVX2-LABEL: ult_28_v2i64:
24908 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
24909 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
24910 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
24911 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
24912 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
24913 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
24914 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
24915 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
24916 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
24917 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24918 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [28,28]
24919 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
24922 ; AVX512VPOPCNTDQ-LABEL: ult_28_v2i64:
24923 ; AVX512VPOPCNTDQ: # %bb.0:
24924 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
24925 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
24926 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [28,28]
24927 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
24928 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
24929 ; AVX512VPOPCNTDQ-NEXT: retq
24931 ; AVX512VPOPCNTDQVL-LABEL: ult_28_v2i64:
24932 ; AVX512VPOPCNTDQVL: # %bb.0:
24933 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
24934 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [28,28]
24935 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
24936 ; AVX512VPOPCNTDQVL-NEXT: retq
24938 ; BITALG_NOVLX-LABEL: ult_28_v2i64:
24939 ; BITALG_NOVLX: # %bb.0:
24940 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
24941 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
24942 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
24943 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24944 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [28,28]
24945 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
24946 ; BITALG_NOVLX-NEXT: vzeroupper
24947 ; BITALG_NOVLX-NEXT: retq
24949 ; BITALG-LABEL: ult_28_v2i64:
24951 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
24952 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
24953 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
24954 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [28,28]
24955 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
24956 ; BITALG-NEXT: retq
24957 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
24958 %3 = icmp ult <2 x i64> %2, <i64 28, i64 28>
24959 %4 = sext <2 x i1> %3 to <2 x i64>
24963 define <2 x i64> @ugt_28_v2i64(<2 x i64> %0) {
24964 ; SSE2-LABEL: ugt_28_v2i64:
24966 ; SSE2-NEXT: movdqa %xmm0, %xmm1
24967 ; SSE2-NEXT: psrlw $1, %xmm1
24968 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24969 ; SSE2-NEXT: psubb %xmm1, %xmm0
24970 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24971 ; SSE2-NEXT: movdqa %xmm0, %xmm2
24972 ; SSE2-NEXT: pand %xmm1, %xmm2
24973 ; SSE2-NEXT: psrlw $2, %xmm0
24974 ; SSE2-NEXT: pand %xmm1, %xmm0
24975 ; SSE2-NEXT: paddb %xmm2, %xmm0
24976 ; SSE2-NEXT: movdqa %xmm0, %xmm1
24977 ; SSE2-NEXT: psrlw $4, %xmm1
24978 ; SSE2-NEXT: paddb %xmm0, %xmm1
24979 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24980 ; SSE2-NEXT: pxor %xmm0, %xmm0
24981 ; SSE2-NEXT: psadbw %xmm1, %xmm0
24982 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
24983 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
24986 ; SSE3-LABEL: ugt_28_v2i64:
24988 ; SSE3-NEXT: movdqa %xmm0, %xmm1
24989 ; SSE3-NEXT: psrlw $1, %xmm1
24990 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
24991 ; SSE3-NEXT: psubb %xmm1, %xmm0
24992 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
24993 ; SSE3-NEXT: movdqa %xmm0, %xmm2
24994 ; SSE3-NEXT: pand %xmm1, %xmm2
24995 ; SSE3-NEXT: psrlw $2, %xmm0
24996 ; SSE3-NEXT: pand %xmm1, %xmm0
24997 ; SSE3-NEXT: paddb %xmm2, %xmm0
24998 ; SSE3-NEXT: movdqa %xmm0, %xmm1
24999 ; SSE3-NEXT: psrlw $4, %xmm1
25000 ; SSE3-NEXT: paddb %xmm0, %xmm1
25001 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25002 ; SSE3-NEXT: pxor %xmm0, %xmm0
25003 ; SSE3-NEXT: psadbw %xmm1, %xmm0
25004 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
25005 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
25008 ; SSSE3-LABEL: ugt_28_v2i64:
25010 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25011 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
25012 ; SSSE3-NEXT: pand %xmm1, %xmm2
25013 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25014 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
25015 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
25016 ; SSSE3-NEXT: psrlw $4, %xmm0
25017 ; SSSE3-NEXT: pand %xmm1, %xmm0
25018 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
25019 ; SSSE3-NEXT: paddb %xmm4, %xmm3
25020 ; SSSE3-NEXT: pxor %xmm0, %xmm0
25021 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
25022 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
25023 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
25026 ; SSE41-LABEL: ugt_28_v2i64:
25028 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25029 ; SSE41-NEXT: movdqa %xmm0, %xmm2
25030 ; SSE41-NEXT: pand %xmm1, %xmm2
25031 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25032 ; SSE41-NEXT: movdqa %xmm3, %xmm4
25033 ; SSE41-NEXT: pshufb %xmm2, %xmm4
25034 ; SSE41-NEXT: psrlw $4, %xmm0
25035 ; SSE41-NEXT: pand %xmm1, %xmm0
25036 ; SSE41-NEXT: pshufb %xmm0, %xmm3
25037 ; SSE41-NEXT: paddb %xmm4, %xmm3
25038 ; SSE41-NEXT: pxor %xmm0, %xmm0
25039 ; SSE41-NEXT: psadbw %xmm3, %xmm0
25040 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
25041 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
25044 ; AVX1-LABEL: ugt_28_v2i64:
25046 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25047 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
25048 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25049 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
25050 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
25051 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
25052 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
25053 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
25054 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
25055 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25056 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
25059 ; AVX2-LABEL: ugt_28_v2i64:
25061 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25062 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
25063 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25064 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
25065 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
25066 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
25067 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
25068 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
25069 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
25070 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25071 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
25074 ; AVX512VPOPCNTDQ-LABEL: ugt_28_v2i64:
25075 ; AVX512VPOPCNTDQ: # %bb.0:
25076 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
25077 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
25078 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
25079 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
25080 ; AVX512VPOPCNTDQ-NEXT: retq
25082 ; AVX512VPOPCNTDQVL-LABEL: ugt_28_v2i64:
25083 ; AVX512VPOPCNTDQVL: # %bb.0:
25084 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
25085 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [28,28]
25086 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
25087 ; AVX512VPOPCNTDQVL-NEXT: retq
25089 ; BITALG_NOVLX-LABEL: ugt_28_v2i64:
25090 ; BITALG_NOVLX: # %bb.0:
25091 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
25092 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
25093 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
25094 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25095 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
25096 ; BITALG_NOVLX-NEXT: vzeroupper
25097 ; BITALG_NOVLX-NEXT: retq
25099 ; BITALG-LABEL: ugt_28_v2i64:
25101 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
25102 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
25103 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25104 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [28,28]
25105 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
25106 ; BITALG-NEXT: retq
25107 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
25108 %3 = icmp ugt <2 x i64> %2, <i64 28, i64 28>
25109 %4 = sext <2 x i1> %3 to <2 x i64>
25113 define <2 x i64> @ult_29_v2i64(<2 x i64> %0) {
25114 ; SSE2-LABEL: ult_29_v2i64:
25116 ; SSE2-NEXT: movdqa %xmm0, %xmm1
25117 ; SSE2-NEXT: psrlw $1, %xmm1
25118 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25119 ; SSE2-NEXT: psubb %xmm1, %xmm0
25120 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25121 ; SSE2-NEXT: movdqa %xmm0, %xmm2
25122 ; SSE2-NEXT: pand %xmm1, %xmm2
25123 ; SSE2-NEXT: psrlw $2, %xmm0
25124 ; SSE2-NEXT: pand %xmm1, %xmm0
25125 ; SSE2-NEXT: paddb %xmm2, %xmm0
25126 ; SSE2-NEXT: movdqa %xmm0, %xmm1
25127 ; SSE2-NEXT: psrlw $4, %xmm1
25128 ; SSE2-NEXT: paddb %xmm0, %xmm1
25129 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25130 ; SSE2-NEXT: pxor %xmm0, %xmm0
25131 ; SSE2-NEXT: psadbw %xmm1, %xmm0
25132 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
25133 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [29,29,29,29]
25134 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
25137 ; SSE3-LABEL: ult_29_v2i64:
25139 ; SSE3-NEXT: movdqa %xmm0, %xmm1
25140 ; SSE3-NEXT: psrlw $1, %xmm1
25141 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25142 ; SSE3-NEXT: psubb %xmm1, %xmm0
25143 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25144 ; SSE3-NEXT: movdqa %xmm0, %xmm2
25145 ; SSE3-NEXT: pand %xmm1, %xmm2
25146 ; SSE3-NEXT: psrlw $2, %xmm0
25147 ; SSE3-NEXT: pand %xmm1, %xmm0
25148 ; SSE3-NEXT: paddb %xmm2, %xmm0
25149 ; SSE3-NEXT: movdqa %xmm0, %xmm1
25150 ; SSE3-NEXT: psrlw $4, %xmm1
25151 ; SSE3-NEXT: paddb %xmm0, %xmm1
25152 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25153 ; SSE3-NEXT: pxor %xmm0, %xmm0
25154 ; SSE3-NEXT: psadbw %xmm1, %xmm0
25155 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
25156 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [29,29,29,29]
25157 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
25160 ; SSSE3-LABEL: ult_29_v2i64:
25162 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25163 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
25164 ; SSSE3-NEXT: pand %xmm1, %xmm2
25165 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25166 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
25167 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
25168 ; SSSE3-NEXT: psrlw $4, %xmm0
25169 ; SSSE3-NEXT: pand %xmm1, %xmm0
25170 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
25171 ; SSSE3-NEXT: paddb %xmm4, %xmm3
25172 ; SSSE3-NEXT: pxor %xmm0, %xmm0
25173 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
25174 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
25175 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [29,29,29,29]
25176 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
25179 ; SSE41-LABEL: ult_29_v2i64:
25181 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25182 ; SSE41-NEXT: movdqa %xmm0, %xmm2
25183 ; SSE41-NEXT: pand %xmm1, %xmm2
25184 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25185 ; SSE41-NEXT: movdqa %xmm3, %xmm4
25186 ; SSE41-NEXT: pshufb %xmm2, %xmm4
25187 ; SSE41-NEXT: psrlw $4, %xmm0
25188 ; SSE41-NEXT: pand %xmm1, %xmm0
25189 ; SSE41-NEXT: pshufb %xmm0, %xmm3
25190 ; SSE41-NEXT: paddb %xmm4, %xmm3
25191 ; SSE41-NEXT: pxor %xmm0, %xmm0
25192 ; SSE41-NEXT: psadbw %xmm3, %xmm0
25193 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
25194 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [29,29,29,29]
25195 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
25198 ; AVX1-LABEL: ult_29_v2i64:
25200 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25201 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
25202 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25203 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
25204 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
25205 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
25206 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
25207 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
25208 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
25209 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25210 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [29,29]
25211 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
25214 ; AVX2-LABEL: ult_29_v2i64:
25216 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25217 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
25218 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25219 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
25220 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
25221 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
25222 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
25223 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
25224 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
25225 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25226 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [29,29]
25227 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
25230 ; AVX512VPOPCNTDQ-LABEL: ult_29_v2i64:
25231 ; AVX512VPOPCNTDQ: # %bb.0:
25232 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
25233 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
25234 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [29,29]
25235 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
25236 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
25237 ; AVX512VPOPCNTDQ-NEXT: retq
25239 ; AVX512VPOPCNTDQVL-LABEL: ult_29_v2i64:
25240 ; AVX512VPOPCNTDQVL: # %bb.0:
25241 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
25242 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [29,29]
25243 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
25244 ; AVX512VPOPCNTDQVL-NEXT: retq
25246 ; BITALG_NOVLX-LABEL: ult_29_v2i64:
25247 ; BITALG_NOVLX: # %bb.0:
25248 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
25249 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
25250 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
25251 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25252 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [29,29]
25253 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
25254 ; BITALG_NOVLX-NEXT: vzeroupper
25255 ; BITALG_NOVLX-NEXT: retq
25257 ; BITALG-LABEL: ult_29_v2i64:
25259 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
25260 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
25261 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25262 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [29,29]
25263 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
25264 ; BITALG-NEXT: retq
25265 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
25266 %3 = icmp ult <2 x i64> %2, <i64 29, i64 29>
25267 %4 = sext <2 x i1> %3 to <2 x i64>
25271 define <2 x i64> @ugt_29_v2i64(<2 x i64> %0) {
25272 ; SSE2-LABEL: ugt_29_v2i64:
25274 ; SSE2-NEXT: movdqa %xmm0, %xmm1
25275 ; SSE2-NEXT: psrlw $1, %xmm1
25276 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25277 ; SSE2-NEXT: psubb %xmm1, %xmm0
25278 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25279 ; SSE2-NEXT: movdqa %xmm0, %xmm2
25280 ; SSE2-NEXT: pand %xmm1, %xmm2
25281 ; SSE2-NEXT: psrlw $2, %xmm0
25282 ; SSE2-NEXT: pand %xmm1, %xmm0
25283 ; SSE2-NEXT: paddb %xmm2, %xmm0
25284 ; SSE2-NEXT: movdqa %xmm0, %xmm1
25285 ; SSE2-NEXT: psrlw $4, %xmm1
25286 ; SSE2-NEXT: paddb %xmm0, %xmm1
25287 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25288 ; SSE2-NEXT: pxor %xmm0, %xmm0
25289 ; SSE2-NEXT: psadbw %xmm1, %xmm0
25290 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
25291 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
25294 ; SSE3-LABEL: ugt_29_v2i64:
25296 ; SSE3-NEXT: movdqa %xmm0, %xmm1
25297 ; SSE3-NEXT: psrlw $1, %xmm1
25298 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25299 ; SSE3-NEXT: psubb %xmm1, %xmm0
25300 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25301 ; SSE3-NEXT: movdqa %xmm0, %xmm2
25302 ; SSE3-NEXT: pand %xmm1, %xmm2
25303 ; SSE3-NEXT: psrlw $2, %xmm0
25304 ; SSE3-NEXT: pand %xmm1, %xmm0
25305 ; SSE3-NEXT: paddb %xmm2, %xmm0
25306 ; SSE3-NEXT: movdqa %xmm0, %xmm1
25307 ; SSE3-NEXT: psrlw $4, %xmm1
25308 ; SSE3-NEXT: paddb %xmm0, %xmm1
25309 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25310 ; SSE3-NEXT: pxor %xmm0, %xmm0
25311 ; SSE3-NEXT: psadbw %xmm1, %xmm0
25312 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
25313 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
25316 ; SSSE3-LABEL: ugt_29_v2i64:
25318 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25319 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
25320 ; SSSE3-NEXT: pand %xmm1, %xmm2
25321 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25322 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
25323 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
25324 ; SSSE3-NEXT: psrlw $4, %xmm0
25325 ; SSSE3-NEXT: pand %xmm1, %xmm0
25326 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
25327 ; SSSE3-NEXT: paddb %xmm4, %xmm3
25328 ; SSSE3-NEXT: pxor %xmm0, %xmm0
25329 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
25330 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
25331 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
25334 ; SSE41-LABEL: ugt_29_v2i64:
25336 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25337 ; SSE41-NEXT: movdqa %xmm0, %xmm2
25338 ; SSE41-NEXT: pand %xmm1, %xmm2
25339 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25340 ; SSE41-NEXT: movdqa %xmm3, %xmm4
25341 ; SSE41-NEXT: pshufb %xmm2, %xmm4
25342 ; SSE41-NEXT: psrlw $4, %xmm0
25343 ; SSE41-NEXT: pand %xmm1, %xmm0
25344 ; SSE41-NEXT: pshufb %xmm0, %xmm3
25345 ; SSE41-NEXT: paddb %xmm4, %xmm3
25346 ; SSE41-NEXT: pxor %xmm0, %xmm0
25347 ; SSE41-NEXT: psadbw %xmm3, %xmm0
25348 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
25349 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
25352 ; AVX1-LABEL: ugt_29_v2i64:
25354 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25355 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
25356 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25357 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
25358 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
25359 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
25360 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
25361 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
25362 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
25363 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25364 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
25367 ; AVX2-LABEL: ugt_29_v2i64:
25369 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25370 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
25371 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25372 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
25373 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
25374 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
25375 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
25376 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
25377 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
25378 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25379 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
25382 ; AVX512VPOPCNTDQ-LABEL: ugt_29_v2i64:
25383 ; AVX512VPOPCNTDQ: # %bb.0:
25384 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
25385 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
25386 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
25387 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
25388 ; AVX512VPOPCNTDQ-NEXT: retq
25390 ; AVX512VPOPCNTDQVL-LABEL: ugt_29_v2i64:
25391 ; AVX512VPOPCNTDQVL: # %bb.0:
25392 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
25393 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [29,29]
25394 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
25395 ; AVX512VPOPCNTDQVL-NEXT: retq
25397 ; BITALG_NOVLX-LABEL: ugt_29_v2i64:
25398 ; BITALG_NOVLX: # %bb.0:
25399 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
25400 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
25401 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
25402 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25403 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
25404 ; BITALG_NOVLX-NEXT: vzeroupper
25405 ; BITALG_NOVLX-NEXT: retq
25407 ; BITALG-LABEL: ugt_29_v2i64:
25409 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
25410 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
25411 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25412 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [29,29]
25413 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
25414 ; BITALG-NEXT: retq
25415 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
25416 %3 = icmp ugt <2 x i64> %2, <i64 29, i64 29>
25417 %4 = sext <2 x i1> %3 to <2 x i64>
25421 define <2 x i64> @ult_30_v2i64(<2 x i64> %0) {
25422 ; SSE2-LABEL: ult_30_v2i64:
25424 ; SSE2-NEXT: movdqa %xmm0, %xmm1
25425 ; SSE2-NEXT: psrlw $1, %xmm1
25426 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25427 ; SSE2-NEXT: psubb %xmm1, %xmm0
25428 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25429 ; SSE2-NEXT: movdqa %xmm0, %xmm2
25430 ; SSE2-NEXT: pand %xmm1, %xmm2
25431 ; SSE2-NEXT: psrlw $2, %xmm0
25432 ; SSE2-NEXT: pand %xmm1, %xmm0
25433 ; SSE2-NEXT: paddb %xmm2, %xmm0
25434 ; SSE2-NEXT: movdqa %xmm0, %xmm1
25435 ; SSE2-NEXT: psrlw $4, %xmm1
25436 ; SSE2-NEXT: paddb %xmm0, %xmm1
25437 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25438 ; SSE2-NEXT: pxor %xmm0, %xmm0
25439 ; SSE2-NEXT: psadbw %xmm1, %xmm0
25440 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
25441 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [30,30,30,30]
25442 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
25445 ; SSE3-LABEL: ult_30_v2i64:
25447 ; SSE3-NEXT: movdqa %xmm0, %xmm1
25448 ; SSE3-NEXT: psrlw $1, %xmm1
25449 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25450 ; SSE3-NEXT: psubb %xmm1, %xmm0
25451 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25452 ; SSE3-NEXT: movdqa %xmm0, %xmm2
25453 ; SSE3-NEXT: pand %xmm1, %xmm2
25454 ; SSE3-NEXT: psrlw $2, %xmm0
25455 ; SSE3-NEXT: pand %xmm1, %xmm0
25456 ; SSE3-NEXT: paddb %xmm2, %xmm0
25457 ; SSE3-NEXT: movdqa %xmm0, %xmm1
25458 ; SSE3-NEXT: psrlw $4, %xmm1
25459 ; SSE3-NEXT: paddb %xmm0, %xmm1
25460 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25461 ; SSE3-NEXT: pxor %xmm0, %xmm0
25462 ; SSE3-NEXT: psadbw %xmm1, %xmm0
25463 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
25464 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [30,30,30,30]
25465 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
25468 ; SSSE3-LABEL: ult_30_v2i64:
25470 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25471 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
25472 ; SSSE3-NEXT: pand %xmm1, %xmm2
25473 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25474 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
25475 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
25476 ; SSSE3-NEXT: psrlw $4, %xmm0
25477 ; SSSE3-NEXT: pand %xmm1, %xmm0
25478 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
25479 ; SSSE3-NEXT: paddb %xmm4, %xmm3
25480 ; SSSE3-NEXT: pxor %xmm0, %xmm0
25481 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
25482 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
25483 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [30,30,30,30]
25484 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
25487 ; SSE41-LABEL: ult_30_v2i64:
25489 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25490 ; SSE41-NEXT: movdqa %xmm0, %xmm2
25491 ; SSE41-NEXT: pand %xmm1, %xmm2
25492 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25493 ; SSE41-NEXT: movdqa %xmm3, %xmm4
25494 ; SSE41-NEXT: pshufb %xmm2, %xmm4
25495 ; SSE41-NEXT: psrlw $4, %xmm0
25496 ; SSE41-NEXT: pand %xmm1, %xmm0
25497 ; SSE41-NEXT: pshufb %xmm0, %xmm3
25498 ; SSE41-NEXT: paddb %xmm4, %xmm3
25499 ; SSE41-NEXT: pxor %xmm0, %xmm0
25500 ; SSE41-NEXT: psadbw %xmm3, %xmm0
25501 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
25502 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [30,30,30,30]
25503 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
25506 ; AVX1-LABEL: ult_30_v2i64:
25508 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25509 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
25510 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25511 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
25512 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
25513 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
25514 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
25515 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
25516 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
25517 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25518 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [30,30]
25519 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
25522 ; AVX2-LABEL: ult_30_v2i64:
25524 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25525 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
25526 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25527 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
25528 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
25529 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
25530 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
25531 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
25532 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
25533 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25534 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [30,30]
25535 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
25538 ; AVX512VPOPCNTDQ-LABEL: ult_30_v2i64:
25539 ; AVX512VPOPCNTDQ: # %bb.0:
25540 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
25541 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
25542 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [30,30]
25543 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
25544 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
25545 ; AVX512VPOPCNTDQ-NEXT: retq
25547 ; AVX512VPOPCNTDQVL-LABEL: ult_30_v2i64:
25548 ; AVX512VPOPCNTDQVL: # %bb.0:
25549 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
25550 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [30,30]
25551 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
25552 ; AVX512VPOPCNTDQVL-NEXT: retq
25554 ; BITALG_NOVLX-LABEL: ult_30_v2i64:
25555 ; BITALG_NOVLX: # %bb.0:
25556 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
25557 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
25558 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
25559 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25560 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [30,30]
25561 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
25562 ; BITALG_NOVLX-NEXT: vzeroupper
25563 ; BITALG_NOVLX-NEXT: retq
25565 ; BITALG-LABEL: ult_30_v2i64:
25567 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
25568 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
25569 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25570 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [30,30]
25571 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
25572 ; BITALG-NEXT: retq
25573 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
25574 %3 = icmp ult <2 x i64> %2, <i64 30, i64 30>
25575 %4 = sext <2 x i1> %3 to <2 x i64>
25579 define <2 x i64> @ugt_30_v2i64(<2 x i64> %0) {
25580 ; SSE2-LABEL: ugt_30_v2i64:
25582 ; SSE2-NEXT: movdqa %xmm0, %xmm1
25583 ; SSE2-NEXT: psrlw $1, %xmm1
25584 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25585 ; SSE2-NEXT: psubb %xmm1, %xmm0
25586 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25587 ; SSE2-NEXT: movdqa %xmm0, %xmm2
25588 ; SSE2-NEXT: pand %xmm1, %xmm2
25589 ; SSE2-NEXT: psrlw $2, %xmm0
25590 ; SSE2-NEXT: pand %xmm1, %xmm0
25591 ; SSE2-NEXT: paddb %xmm2, %xmm0
25592 ; SSE2-NEXT: movdqa %xmm0, %xmm1
25593 ; SSE2-NEXT: psrlw $4, %xmm1
25594 ; SSE2-NEXT: paddb %xmm0, %xmm1
25595 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25596 ; SSE2-NEXT: pxor %xmm0, %xmm0
25597 ; SSE2-NEXT: psadbw %xmm1, %xmm0
25598 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
25599 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
25602 ; SSE3-LABEL: ugt_30_v2i64:
25604 ; SSE3-NEXT: movdqa %xmm0, %xmm1
25605 ; SSE3-NEXT: psrlw $1, %xmm1
25606 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25607 ; SSE3-NEXT: psubb %xmm1, %xmm0
25608 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25609 ; SSE3-NEXT: movdqa %xmm0, %xmm2
25610 ; SSE3-NEXT: pand %xmm1, %xmm2
25611 ; SSE3-NEXT: psrlw $2, %xmm0
25612 ; SSE3-NEXT: pand %xmm1, %xmm0
25613 ; SSE3-NEXT: paddb %xmm2, %xmm0
25614 ; SSE3-NEXT: movdqa %xmm0, %xmm1
25615 ; SSE3-NEXT: psrlw $4, %xmm1
25616 ; SSE3-NEXT: paddb %xmm0, %xmm1
25617 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25618 ; SSE3-NEXT: pxor %xmm0, %xmm0
25619 ; SSE3-NEXT: psadbw %xmm1, %xmm0
25620 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
25621 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
25624 ; SSSE3-LABEL: ugt_30_v2i64:
25626 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25627 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
25628 ; SSSE3-NEXT: pand %xmm1, %xmm2
25629 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25630 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
25631 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
25632 ; SSSE3-NEXT: psrlw $4, %xmm0
25633 ; SSSE3-NEXT: pand %xmm1, %xmm0
25634 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
25635 ; SSSE3-NEXT: paddb %xmm4, %xmm3
25636 ; SSSE3-NEXT: pxor %xmm0, %xmm0
25637 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
25638 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
25639 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
25642 ; SSE41-LABEL: ugt_30_v2i64:
25644 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25645 ; SSE41-NEXT: movdqa %xmm0, %xmm2
25646 ; SSE41-NEXT: pand %xmm1, %xmm2
25647 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25648 ; SSE41-NEXT: movdqa %xmm3, %xmm4
25649 ; SSE41-NEXT: pshufb %xmm2, %xmm4
25650 ; SSE41-NEXT: psrlw $4, %xmm0
25651 ; SSE41-NEXT: pand %xmm1, %xmm0
25652 ; SSE41-NEXT: pshufb %xmm0, %xmm3
25653 ; SSE41-NEXT: paddb %xmm4, %xmm3
25654 ; SSE41-NEXT: pxor %xmm0, %xmm0
25655 ; SSE41-NEXT: psadbw %xmm3, %xmm0
25656 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
25657 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
25660 ; AVX1-LABEL: ugt_30_v2i64:
25662 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25663 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
25664 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25665 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
25666 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
25667 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
25668 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
25669 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
25670 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
25671 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25672 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
25675 ; AVX2-LABEL: ugt_30_v2i64:
25677 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25678 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
25679 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25680 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
25681 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
25682 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
25683 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
25684 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
25685 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
25686 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25687 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
25690 ; AVX512VPOPCNTDQ-LABEL: ugt_30_v2i64:
25691 ; AVX512VPOPCNTDQ: # %bb.0:
25692 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
25693 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
25694 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
25695 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
25696 ; AVX512VPOPCNTDQ-NEXT: retq
25698 ; AVX512VPOPCNTDQVL-LABEL: ugt_30_v2i64:
25699 ; AVX512VPOPCNTDQVL: # %bb.0:
25700 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
25701 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [30,30]
25702 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
25703 ; AVX512VPOPCNTDQVL-NEXT: retq
25705 ; BITALG_NOVLX-LABEL: ugt_30_v2i64:
25706 ; BITALG_NOVLX: # %bb.0:
25707 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
25708 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
25709 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
25710 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25711 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
25712 ; BITALG_NOVLX-NEXT: vzeroupper
25713 ; BITALG_NOVLX-NEXT: retq
25715 ; BITALG-LABEL: ugt_30_v2i64:
25717 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
25718 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
25719 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25720 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [30,30]
25721 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
25722 ; BITALG-NEXT: retq
25723 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
25724 %3 = icmp ugt <2 x i64> %2, <i64 30, i64 30>
25725 %4 = sext <2 x i1> %3 to <2 x i64>
25729 define <2 x i64> @ult_31_v2i64(<2 x i64> %0) {
25730 ; SSE2-LABEL: ult_31_v2i64:
25732 ; SSE2-NEXT: movdqa %xmm0, %xmm1
25733 ; SSE2-NEXT: psrlw $1, %xmm1
25734 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25735 ; SSE2-NEXT: psubb %xmm1, %xmm0
25736 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25737 ; SSE2-NEXT: movdqa %xmm0, %xmm2
25738 ; SSE2-NEXT: pand %xmm1, %xmm2
25739 ; SSE2-NEXT: psrlw $2, %xmm0
25740 ; SSE2-NEXT: pand %xmm1, %xmm0
25741 ; SSE2-NEXT: paddb %xmm2, %xmm0
25742 ; SSE2-NEXT: movdqa %xmm0, %xmm1
25743 ; SSE2-NEXT: psrlw $4, %xmm1
25744 ; SSE2-NEXT: paddb %xmm0, %xmm1
25745 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25746 ; SSE2-NEXT: pxor %xmm0, %xmm0
25747 ; SSE2-NEXT: psadbw %xmm1, %xmm0
25748 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
25749 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [31,31,31,31]
25750 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
25753 ; SSE3-LABEL: ult_31_v2i64:
25755 ; SSE3-NEXT: movdqa %xmm0, %xmm1
25756 ; SSE3-NEXT: psrlw $1, %xmm1
25757 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25758 ; SSE3-NEXT: psubb %xmm1, %xmm0
25759 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25760 ; SSE3-NEXT: movdqa %xmm0, %xmm2
25761 ; SSE3-NEXT: pand %xmm1, %xmm2
25762 ; SSE3-NEXT: psrlw $2, %xmm0
25763 ; SSE3-NEXT: pand %xmm1, %xmm0
25764 ; SSE3-NEXT: paddb %xmm2, %xmm0
25765 ; SSE3-NEXT: movdqa %xmm0, %xmm1
25766 ; SSE3-NEXT: psrlw $4, %xmm1
25767 ; SSE3-NEXT: paddb %xmm0, %xmm1
25768 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25769 ; SSE3-NEXT: pxor %xmm0, %xmm0
25770 ; SSE3-NEXT: psadbw %xmm1, %xmm0
25771 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
25772 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [31,31,31,31]
25773 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
25776 ; SSSE3-LABEL: ult_31_v2i64:
25778 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25779 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
25780 ; SSSE3-NEXT: pand %xmm1, %xmm2
25781 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25782 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
25783 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
25784 ; SSSE3-NEXT: psrlw $4, %xmm0
25785 ; SSSE3-NEXT: pand %xmm1, %xmm0
25786 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
25787 ; SSSE3-NEXT: paddb %xmm4, %xmm3
25788 ; SSSE3-NEXT: pxor %xmm0, %xmm0
25789 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
25790 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
25791 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [31,31,31,31]
25792 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
25795 ; SSE41-LABEL: ult_31_v2i64:
25797 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25798 ; SSE41-NEXT: movdqa %xmm0, %xmm2
25799 ; SSE41-NEXT: pand %xmm1, %xmm2
25800 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25801 ; SSE41-NEXT: movdqa %xmm3, %xmm4
25802 ; SSE41-NEXT: pshufb %xmm2, %xmm4
25803 ; SSE41-NEXT: psrlw $4, %xmm0
25804 ; SSE41-NEXT: pand %xmm1, %xmm0
25805 ; SSE41-NEXT: pshufb %xmm0, %xmm3
25806 ; SSE41-NEXT: paddb %xmm4, %xmm3
25807 ; SSE41-NEXT: pxor %xmm0, %xmm0
25808 ; SSE41-NEXT: psadbw %xmm3, %xmm0
25809 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
25810 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [31,31,31,31]
25811 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
25814 ; AVX1-LABEL: ult_31_v2i64:
25816 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25817 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
25818 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25819 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
25820 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
25821 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
25822 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
25823 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
25824 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
25825 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25826 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [31,31]
25827 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
25830 ; AVX2-LABEL: ult_31_v2i64:
25832 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25833 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
25834 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25835 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
25836 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
25837 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
25838 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
25839 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
25840 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
25841 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25842 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [31,31]
25843 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
25846 ; AVX512VPOPCNTDQ-LABEL: ult_31_v2i64:
25847 ; AVX512VPOPCNTDQ: # %bb.0:
25848 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
25849 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
25850 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [31,31]
25851 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
25852 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
25853 ; AVX512VPOPCNTDQ-NEXT: retq
25855 ; AVX512VPOPCNTDQVL-LABEL: ult_31_v2i64:
25856 ; AVX512VPOPCNTDQVL: # %bb.0:
25857 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
25858 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [31,31]
25859 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
25860 ; AVX512VPOPCNTDQVL-NEXT: retq
25862 ; BITALG_NOVLX-LABEL: ult_31_v2i64:
25863 ; BITALG_NOVLX: # %bb.0:
25864 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
25865 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
25866 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
25867 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25868 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [31,31]
25869 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
25870 ; BITALG_NOVLX-NEXT: vzeroupper
25871 ; BITALG_NOVLX-NEXT: retq
25873 ; BITALG-LABEL: ult_31_v2i64:
25875 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
25876 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
25877 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25878 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [31,31]
25879 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
25880 ; BITALG-NEXT: retq
25881 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
25882 %3 = icmp ult <2 x i64> %2, <i64 31, i64 31>
25883 %4 = sext <2 x i1> %3 to <2 x i64>
25887 define <2 x i64> @ugt_31_v2i64(<2 x i64> %0) {
25888 ; SSE2-LABEL: ugt_31_v2i64:
25890 ; SSE2-NEXT: movdqa %xmm0, %xmm1
25891 ; SSE2-NEXT: psrlw $1, %xmm1
25892 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25893 ; SSE2-NEXT: psubb %xmm1, %xmm0
25894 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25895 ; SSE2-NEXT: movdqa %xmm0, %xmm2
25896 ; SSE2-NEXT: pand %xmm1, %xmm2
25897 ; SSE2-NEXT: psrlw $2, %xmm0
25898 ; SSE2-NEXT: pand %xmm1, %xmm0
25899 ; SSE2-NEXT: paddb %xmm2, %xmm0
25900 ; SSE2-NEXT: movdqa %xmm0, %xmm1
25901 ; SSE2-NEXT: psrlw $4, %xmm1
25902 ; SSE2-NEXT: paddb %xmm0, %xmm1
25903 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25904 ; SSE2-NEXT: pxor %xmm0, %xmm0
25905 ; SSE2-NEXT: psadbw %xmm1, %xmm0
25906 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
25907 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
25910 ; SSE3-LABEL: ugt_31_v2i64:
25912 ; SSE3-NEXT: movdqa %xmm0, %xmm1
25913 ; SSE3-NEXT: psrlw $1, %xmm1
25914 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25915 ; SSE3-NEXT: psubb %xmm1, %xmm0
25916 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
25917 ; SSE3-NEXT: movdqa %xmm0, %xmm2
25918 ; SSE3-NEXT: pand %xmm1, %xmm2
25919 ; SSE3-NEXT: psrlw $2, %xmm0
25920 ; SSE3-NEXT: pand %xmm1, %xmm0
25921 ; SSE3-NEXT: paddb %xmm2, %xmm0
25922 ; SSE3-NEXT: movdqa %xmm0, %xmm1
25923 ; SSE3-NEXT: psrlw $4, %xmm1
25924 ; SSE3-NEXT: paddb %xmm0, %xmm1
25925 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
25926 ; SSE3-NEXT: pxor %xmm0, %xmm0
25927 ; SSE3-NEXT: psadbw %xmm1, %xmm0
25928 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
25929 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
25932 ; SSSE3-LABEL: ugt_31_v2i64:
25934 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25935 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
25936 ; SSSE3-NEXT: pand %xmm1, %xmm2
25937 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25938 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
25939 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
25940 ; SSSE3-NEXT: psrlw $4, %xmm0
25941 ; SSSE3-NEXT: pand %xmm1, %xmm0
25942 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
25943 ; SSSE3-NEXT: paddb %xmm4, %xmm3
25944 ; SSSE3-NEXT: pxor %xmm0, %xmm0
25945 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
25946 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
25947 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
25950 ; SSE41-LABEL: ugt_31_v2i64:
25952 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25953 ; SSE41-NEXT: movdqa %xmm0, %xmm2
25954 ; SSE41-NEXT: pand %xmm1, %xmm2
25955 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25956 ; SSE41-NEXT: movdqa %xmm3, %xmm4
25957 ; SSE41-NEXT: pshufb %xmm2, %xmm4
25958 ; SSE41-NEXT: psrlw $4, %xmm0
25959 ; SSE41-NEXT: pand %xmm1, %xmm0
25960 ; SSE41-NEXT: pshufb %xmm0, %xmm3
25961 ; SSE41-NEXT: paddb %xmm4, %xmm3
25962 ; SSE41-NEXT: pxor %xmm0, %xmm0
25963 ; SSE41-NEXT: psadbw %xmm3, %xmm0
25964 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
25965 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
25968 ; AVX1-LABEL: ugt_31_v2i64:
25970 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25971 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
25972 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25973 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
25974 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
25975 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
25976 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
25977 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
25978 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
25979 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25980 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
25983 ; AVX2-LABEL: ugt_31_v2i64:
25985 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
25986 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
25987 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
25988 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
25989 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
25990 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
25991 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
25992 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
25993 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
25994 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
25995 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
25998 ; AVX512VPOPCNTDQ-LABEL: ugt_31_v2i64:
25999 ; AVX512VPOPCNTDQ: # %bb.0:
26000 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
26001 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
26002 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
26003 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
26004 ; AVX512VPOPCNTDQ-NEXT: retq
26006 ; AVX512VPOPCNTDQVL-LABEL: ugt_31_v2i64:
26007 ; AVX512VPOPCNTDQVL: # %bb.0:
26008 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
26009 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [31,31]
26010 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
26011 ; AVX512VPOPCNTDQVL-NEXT: retq
26013 ; BITALG_NOVLX-LABEL: ugt_31_v2i64:
26014 ; BITALG_NOVLX: # %bb.0:
26015 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
26016 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
26017 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
26018 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26019 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
26020 ; BITALG_NOVLX-NEXT: vzeroupper
26021 ; BITALG_NOVLX-NEXT: retq
26023 ; BITALG-LABEL: ugt_31_v2i64:
26025 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
26026 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
26027 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26028 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [31,31]
26029 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
26030 ; BITALG-NEXT: retq
26031 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
26032 %3 = icmp ugt <2 x i64> %2, <i64 31, i64 31>
26033 %4 = sext <2 x i1> %3 to <2 x i64>
26037 define <2 x i64> @ult_32_v2i64(<2 x i64> %0) {
26038 ; SSE2-LABEL: ult_32_v2i64:
26040 ; SSE2-NEXT: movdqa %xmm0, %xmm1
26041 ; SSE2-NEXT: psrlw $1, %xmm1
26042 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26043 ; SSE2-NEXT: psubb %xmm1, %xmm0
26044 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26045 ; SSE2-NEXT: movdqa %xmm0, %xmm2
26046 ; SSE2-NEXT: pand %xmm1, %xmm2
26047 ; SSE2-NEXT: psrlw $2, %xmm0
26048 ; SSE2-NEXT: pand %xmm1, %xmm0
26049 ; SSE2-NEXT: paddb %xmm2, %xmm0
26050 ; SSE2-NEXT: movdqa %xmm0, %xmm1
26051 ; SSE2-NEXT: psrlw $4, %xmm1
26052 ; SSE2-NEXT: paddb %xmm0, %xmm1
26053 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26054 ; SSE2-NEXT: pxor %xmm0, %xmm0
26055 ; SSE2-NEXT: psadbw %xmm1, %xmm0
26056 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
26057 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [32,32,32,32]
26058 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
26061 ; SSE3-LABEL: ult_32_v2i64:
26063 ; SSE3-NEXT: movdqa %xmm0, %xmm1
26064 ; SSE3-NEXT: psrlw $1, %xmm1
26065 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26066 ; SSE3-NEXT: psubb %xmm1, %xmm0
26067 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26068 ; SSE3-NEXT: movdqa %xmm0, %xmm2
26069 ; SSE3-NEXT: pand %xmm1, %xmm2
26070 ; SSE3-NEXT: psrlw $2, %xmm0
26071 ; SSE3-NEXT: pand %xmm1, %xmm0
26072 ; SSE3-NEXT: paddb %xmm2, %xmm0
26073 ; SSE3-NEXT: movdqa %xmm0, %xmm1
26074 ; SSE3-NEXT: psrlw $4, %xmm1
26075 ; SSE3-NEXT: paddb %xmm0, %xmm1
26076 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26077 ; SSE3-NEXT: pxor %xmm0, %xmm0
26078 ; SSE3-NEXT: psadbw %xmm1, %xmm0
26079 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
26080 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [32,32,32,32]
26081 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
26084 ; SSSE3-LABEL: ult_32_v2i64:
26086 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26087 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
26088 ; SSSE3-NEXT: pand %xmm1, %xmm2
26089 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26090 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
26091 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
26092 ; SSSE3-NEXT: psrlw $4, %xmm0
26093 ; SSSE3-NEXT: pand %xmm1, %xmm0
26094 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
26095 ; SSSE3-NEXT: paddb %xmm4, %xmm3
26096 ; SSSE3-NEXT: pxor %xmm0, %xmm0
26097 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
26098 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
26099 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [32,32,32,32]
26100 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
26103 ; SSE41-LABEL: ult_32_v2i64:
26105 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26106 ; SSE41-NEXT: movdqa %xmm0, %xmm2
26107 ; SSE41-NEXT: pand %xmm1, %xmm2
26108 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26109 ; SSE41-NEXT: movdqa %xmm3, %xmm4
26110 ; SSE41-NEXT: pshufb %xmm2, %xmm4
26111 ; SSE41-NEXT: psrlw $4, %xmm0
26112 ; SSE41-NEXT: pand %xmm1, %xmm0
26113 ; SSE41-NEXT: pshufb %xmm0, %xmm3
26114 ; SSE41-NEXT: paddb %xmm4, %xmm3
26115 ; SSE41-NEXT: pxor %xmm0, %xmm0
26116 ; SSE41-NEXT: psadbw %xmm3, %xmm0
26117 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
26118 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [32,32,32,32]
26119 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
26122 ; AVX1-LABEL: ult_32_v2i64:
26124 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26125 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
26126 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26127 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
26128 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
26129 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
26130 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
26131 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
26132 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
26133 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26134 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [32,32]
26135 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
26138 ; AVX2-LABEL: ult_32_v2i64:
26140 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26141 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
26142 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26143 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
26144 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
26145 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
26146 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
26147 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
26148 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
26149 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26150 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [32,32]
26151 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
26154 ; AVX512VPOPCNTDQ-LABEL: ult_32_v2i64:
26155 ; AVX512VPOPCNTDQ: # %bb.0:
26156 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
26157 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
26158 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [32,32]
26159 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
26160 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
26161 ; AVX512VPOPCNTDQ-NEXT: retq
26163 ; AVX512VPOPCNTDQVL-LABEL: ult_32_v2i64:
26164 ; AVX512VPOPCNTDQVL: # %bb.0:
26165 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
26166 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [32,32]
26167 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
26168 ; AVX512VPOPCNTDQVL-NEXT: retq
26170 ; BITALG_NOVLX-LABEL: ult_32_v2i64:
26171 ; BITALG_NOVLX: # %bb.0:
26172 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
26173 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
26174 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
26175 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26176 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [32,32]
26177 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
26178 ; BITALG_NOVLX-NEXT: vzeroupper
26179 ; BITALG_NOVLX-NEXT: retq
26181 ; BITALG-LABEL: ult_32_v2i64:
26183 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
26184 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
26185 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26186 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [32,32]
26187 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
26188 ; BITALG-NEXT: retq
26189 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
26190 %3 = icmp ult <2 x i64> %2, <i64 32, i64 32>
26191 %4 = sext <2 x i1> %3 to <2 x i64>
26195 define <2 x i64> @ugt_32_v2i64(<2 x i64> %0) {
26196 ; SSE2-LABEL: ugt_32_v2i64:
26198 ; SSE2-NEXT: movdqa %xmm0, %xmm1
26199 ; SSE2-NEXT: psrlw $1, %xmm1
26200 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26201 ; SSE2-NEXT: psubb %xmm1, %xmm0
26202 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26203 ; SSE2-NEXT: movdqa %xmm0, %xmm2
26204 ; SSE2-NEXT: pand %xmm1, %xmm2
26205 ; SSE2-NEXT: psrlw $2, %xmm0
26206 ; SSE2-NEXT: pand %xmm1, %xmm0
26207 ; SSE2-NEXT: paddb %xmm2, %xmm0
26208 ; SSE2-NEXT: movdqa %xmm0, %xmm1
26209 ; SSE2-NEXT: psrlw $4, %xmm1
26210 ; SSE2-NEXT: paddb %xmm0, %xmm1
26211 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26212 ; SSE2-NEXT: pxor %xmm0, %xmm0
26213 ; SSE2-NEXT: psadbw %xmm1, %xmm0
26214 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
26215 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
26218 ; SSE3-LABEL: ugt_32_v2i64:
26220 ; SSE3-NEXT: movdqa %xmm0, %xmm1
26221 ; SSE3-NEXT: psrlw $1, %xmm1
26222 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26223 ; SSE3-NEXT: psubb %xmm1, %xmm0
26224 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26225 ; SSE3-NEXT: movdqa %xmm0, %xmm2
26226 ; SSE3-NEXT: pand %xmm1, %xmm2
26227 ; SSE3-NEXT: psrlw $2, %xmm0
26228 ; SSE3-NEXT: pand %xmm1, %xmm0
26229 ; SSE3-NEXT: paddb %xmm2, %xmm0
26230 ; SSE3-NEXT: movdqa %xmm0, %xmm1
26231 ; SSE3-NEXT: psrlw $4, %xmm1
26232 ; SSE3-NEXT: paddb %xmm0, %xmm1
26233 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26234 ; SSE3-NEXT: pxor %xmm0, %xmm0
26235 ; SSE3-NEXT: psadbw %xmm1, %xmm0
26236 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
26237 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
26240 ; SSSE3-LABEL: ugt_32_v2i64:
26242 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26243 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
26244 ; SSSE3-NEXT: pand %xmm1, %xmm2
26245 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26246 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
26247 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
26248 ; SSSE3-NEXT: psrlw $4, %xmm0
26249 ; SSSE3-NEXT: pand %xmm1, %xmm0
26250 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
26251 ; SSSE3-NEXT: paddb %xmm4, %xmm3
26252 ; SSSE3-NEXT: pxor %xmm0, %xmm0
26253 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
26254 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
26255 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
26258 ; SSE41-LABEL: ugt_32_v2i64:
26260 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26261 ; SSE41-NEXT: movdqa %xmm0, %xmm2
26262 ; SSE41-NEXT: pand %xmm1, %xmm2
26263 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26264 ; SSE41-NEXT: movdqa %xmm3, %xmm4
26265 ; SSE41-NEXT: pshufb %xmm2, %xmm4
26266 ; SSE41-NEXT: psrlw $4, %xmm0
26267 ; SSE41-NEXT: pand %xmm1, %xmm0
26268 ; SSE41-NEXT: pshufb %xmm0, %xmm3
26269 ; SSE41-NEXT: paddb %xmm4, %xmm3
26270 ; SSE41-NEXT: pxor %xmm0, %xmm0
26271 ; SSE41-NEXT: psadbw %xmm3, %xmm0
26272 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
26273 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
26276 ; AVX1-LABEL: ugt_32_v2i64:
26278 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26279 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
26280 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26281 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
26282 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
26283 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
26284 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
26285 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
26286 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
26287 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26288 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
26291 ; AVX2-LABEL: ugt_32_v2i64:
26293 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26294 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
26295 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26296 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
26297 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
26298 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
26299 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
26300 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
26301 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
26302 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26303 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
26306 ; AVX512VPOPCNTDQ-LABEL: ugt_32_v2i64:
26307 ; AVX512VPOPCNTDQ: # %bb.0:
26308 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
26309 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
26310 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
26311 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
26312 ; AVX512VPOPCNTDQ-NEXT: retq
26314 ; AVX512VPOPCNTDQVL-LABEL: ugt_32_v2i64:
26315 ; AVX512VPOPCNTDQVL: # %bb.0:
26316 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
26317 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [32,32]
26318 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
26319 ; AVX512VPOPCNTDQVL-NEXT: retq
26321 ; BITALG_NOVLX-LABEL: ugt_32_v2i64:
26322 ; BITALG_NOVLX: # %bb.0:
26323 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
26324 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
26325 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
26326 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26327 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
26328 ; BITALG_NOVLX-NEXT: vzeroupper
26329 ; BITALG_NOVLX-NEXT: retq
26331 ; BITALG-LABEL: ugt_32_v2i64:
26333 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
26334 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
26335 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26336 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [32,32]
26337 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
26338 ; BITALG-NEXT: retq
26339 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
26340 %3 = icmp ugt <2 x i64> %2, <i64 32, i64 32>
26341 %4 = sext <2 x i1> %3 to <2 x i64>
26345 define <2 x i64> @ult_33_v2i64(<2 x i64> %0) {
26346 ; SSE2-LABEL: ult_33_v2i64:
26348 ; SSE2-NEXT: movdqa %xmm0, %xmm1
26349 ; SSE2-NEXT: psrlw $1, %xmm1
26350 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26351 ; SSE2-NEXT: psubb %xmm1, %xmm0
26352 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26353 ; SSE2-NEXT: movdqa %xmm0, %xmm2
26354 ; SSE2-NEXT: pand %xmm1, %xmm2
26355 ; SSE2-NEXT: psrlw $2, %xmm0
26356 ; SSE2-NEXT: pand %xmm1, %xmm0
26357 ; SSE2-NEXT: paddb %xmm2, %xmm0
26358 ; SSE2-NEXT: movdqa %xmm0, %xmm1
26359 ; SSE2-NEXT: psrlw $4, %xmm1
26360 ; SSE2-NEXT: paddb %xmm0, %xmm1
26361 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26362 ; SSE2-NEXT: pxor %xmm0, %xmm0
26363 ; SSE2-NEXT: psadbw %xmm1, %xmm0
26364 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
26365 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [33,33,33,33]
26366 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
26369 ; SSE3-LABEL: ult_33_v2i64:
26371 ; SSE3-NEXT: movdqa %xmm0, %xmm1
26372 ; SSE3-NEXT: psrlw $1, %xmm1
26373 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26374 ; SSE3-NEXT: psubb %xmm1, %xmm0
26375 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26376 ; SSE3-NEXT: movdqa %xmm0, %xmm2
26377 ; SSE3-NEXT: pand %xmm1, %xmm2
26378 ; SSE3-NEXT: psrlw $2, %xmm0
26379 ; SSE3-NEXT: pand %xmm1, %xmm0
26380 ; SSE3-NEXT: paddb %xmm2, %xmm0
26381 ; SSE3-NEXT: movdqa %xmm0, %xmm1
26382 ; SSE3-NEXT: psrlw $4, %xmm1
26383 ; SSE3-NEXT: paddb %xmm0, %xmm1
26384 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26385 ; SSE3-NEXT: pxor %xmm0, %xmm0
26386 ; SSE3-NEXT: psadbw %xmm1, %xmm0
26387 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
26388 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [33,33,33,33]
26389 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
26392 ; SSSE3-LABEL: ult_33_v2i64:
26394 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26395 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
26396 ; SSSE3-NEXT: pand %xmm1, %xmm2
26397 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26398 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
26399 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
26400 ; SSSE3-NEXT: psrlw $4, %xmm0
26401 ; SSSE3-NEXT: pand %xmm1, %xmm0
26402 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
26403 ; SSSE3-NEXT: paddb %xmm4, %xmm3
26404 ; SSSE3-NEXT: pxor %xmm0, %xmm0
26405 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
26406 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
26407 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [33,33,33,33]
26408 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
26411 ; SSE41-LABEL: ult_33_v2i64:
26413 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26414 ; SSE41-NEXT: movdqa %xmm0, %xmm2
26415 ; SSE41-NEXT: pand %xmm1, %xmm2
26416 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26417 ; SSE41-NEXT: movdqa %xmm3, %xmm4
26418 ; SSE41-NEXT: pshufb %xmm2, %xmm4
26419 ; SSE41-NEXT: psrlw $4, %xmm0
26420 ; SSE41-NEXT: pand %xmm1, %xmm0
26421 ; SSE41-NEXT: pshufb %xmm0, %xmm3
26422 ; SSE41-NEXT: paddb %xmm4, %xmm3
26423 ; SSE41-NEXT: pxor %xmm0, %xmm0
26424 ; SSE41-NEXT: psadbw %xmm3, %xmm0
26425 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
26426 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [33,33,33,33]
26427 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
26430 ; AVX1-LABEL: ult_33_v2i64:
26432 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26433 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
26434 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26435 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
26436 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
26437 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
26438 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
26439 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
26440 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
26441 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26442 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [33,33]
26443 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
26446 ; AVX2-LABEL: ult_33_v2i64:
26448 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26449 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
26450 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26451 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
26452 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
26453 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
26454 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
26455 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
26456 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
26457 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26458 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [33,33]
26459 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
26462 ; AVX512VPOPCNTDQ-LABEL: ult_33_v2i64:
26463 ; AVX512VPOPCNTDQ: # %bb.0:
26464 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
26465 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
26466 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [33,33]
26467 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
26468 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
26469 ; AVX512VPOPCNTDQ-NEXT: retq
26471 ; AVX512VPOPCNTDQVL-LABEL: ult_33_v2i64:
26472 ; AVX512VPOPCNTDQVL: # %bb.0:
26473 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
26474 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [33,33]
26475 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
26476 ; AVX512VPOPCNTDQVL-NEXT: retq
26478 ; BITALG_NOVLX-LABEL: ult_33_v2i64:
26479 ; BITALG_NOVLX: # %bb.0:
26480 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
26481 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
26482 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
26483 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26484 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [33,33]
26485 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
26486 ; BITALG_NOVLX-NEXT: vzeroupper
26487 ; BITALG_NOVLX-NEXT: retq
26489 ; BITALG-LABEL: ult_33_v2i64:
26491 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
26492 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
26493 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26494 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [33,33]
26495 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
26496 ; BITALG-NEXT: retq
26497 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
26498 %3 = icmp ult <2 x i64> %2, <i64 33, i64 33>
26499 %4 = sext <2 x i1> %3 to <2 x i64>
26503 define <2 x i64> @ugt_33_v2i64(<2 x i64> %0) {
26504 ; SSE2-LABEL: ugt_33_v2i64:
26506 ; SSE2-NEXT: movdqa %xmm0, %xmm1
26507 ; SSE2-NEXT: psrlw $1, %xmm1
26508 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26509 ; SSE2-NEXT: psubb %xmm1, %xmm0
26510 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26511 ; SSE2-NEXT: movdqa %xmm0, %xmm2
26512 ; SSE2-NEXT: pand %xmm1, %xmm2
26513 ; SSE2-NEXT: psrlw $2, %xmm0
26514 ; SSE2-NEXT: pand %xmm1, %xmm0
26515 ; SSE2-NEXT: paddb %xmm2, %xmm0
26516 ; SSE2-NEXT: movdqa %xmm0, %xmm1
26517 ; SSE2-NEXT: psrlw $4, %xmm1
26518 ; SSE2-NEXT: paddb %xmm0, %xmm1
26519 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26520 ; SSE2-NEXT: pxor %xmm0, %xmm0
26521 ; SSE2-NEXT: psadbw %xmm1, %xmm0
26522 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
26523 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
26526 ; SSE3-LABEL: ugt_33_v2i64:
26528 ; SSE3-NEXT: movdqa %xmm0, %xmm1
26529 ; SSE3-NEXT: psrlw $1, %xmm1
26530 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26531 ; SSE3-NEXT: psubb %xmm1, %xmm0
26532 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26533 ; SSE3-NEXT: movdqa %xmm0, %xmm2
26534 ; SSE3-NEXT: pand %xmm1, %xmm2
26535 ; SSE3-NEXT: psrlw $2, %xmm0
26536 ; SSE3-NEXT: pand %xmm1, %xmm0
26537 ; SSE3-NEXT: paddb %xmm2, %xmm0
26538 ; SSE3-NEXT: movdqa %xmm0, %xmm1
26539 ; SSE3-NEXT: psrlw $4, %xmm1
26540 ; SSE3-NEXT: paddb %xmm0, %xmm1
26541 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26542 ; SSE3-NEXT: pxor %xmm0, %xmm0
26543 ; SSE3-NEXT: psadbw %xmm1, %xmm0
26544 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
26545 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
26548 ; SSSE3-LABEL: ugt_33_v2i64:
26550 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26551 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
26552 ; SSSE3-NEXT: pand %xmm1, %xmm2
26553 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26554 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
26555 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
26556 ; SSSE3-NEXT: psrlw $4, %xmm0
26557 ; SSSE3-NEXT: pand %xmm1, %xmm0
26558 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
26559 ; SSSE3-NEXT: paddb %xmm4, %xmm3
26560 ; SSSE3-NEXT: pxor %xmm0, %xmm0
26561 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
26562 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
26563 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
26566 ; SSE41-LABEL: ugt_33_v2i64:
26568 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26569 ; SSE41-NEXT: movdqa %xmm0, %xmm2
26570 ; SSE41-NEXT: pand %xmm1, %xmm2
26571 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26572 ; SSE41-NEXT: movdqa %xmm3, %xmm4
26573 ; SSE41-NEXT: pshufb %xmm2, %xmm4
26574 ; SSE41-NEXT: psrlw $4, %xmm0
26575 ; SSE41-NEXT: pand %xmm1, %xmm0
26576 ; SSE41-NEXT: pshufb %xmm0, %xmm3
26577 ; SSE41-NEXT: paddb %xmm4, %xmm3
26578 ; SSE41-NEXT: pxor %xmm0, %xmm0
26579 ; SSE41-NEXT: psadbw %xmm3, %xmm0
26580 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
26581 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
26584 ; AVX1-LABEL: ugt_33_v2i64:
26586 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26587 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
26588 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26589 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
26590 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
26591 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
26592 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
26593 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
26594 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
26595 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26596 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
26599 ; AVX2-LABEL: ugt_33_v2i64:
26601 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26602 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
26603 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26604 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
26605 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
26606 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
26607 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
26608 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
26609 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
26610 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26611 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
26614 ; AVX512VPOPCNTDQ-LABEL: ugt_33_v2i64:
26615 ; AVX512VPOPCNTDQ: # %bb.0:
26616 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
26617 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
26618 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
26619 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
26620 ; AVX512VPOPCNTDQ-NEXT: retq
26622 ; AVX512VPOPCNTDQVL-LABEL: ugt_33_v2i64:
26623 ; AVX512VPOPCNTDQVL: # %bb.0:
26624 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
26625 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [33,33]
26626 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
26627 ; AVX512VPOPCNTDQVL-NEXT: retq
26629 ; BITALG_NOVLX-LABEL: ugt_33_v2i64:
26630 ; BITALG_NOVLX: # %bb.0:
26631 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
26632 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
26633 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
26634 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26635 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
26636 ; BITALG_NOVLX-NEXT: vzeroupper
26637 ; BITALG_NOVLX-NEXT: retq
26639 ; BITALG-LABEL: ugt_33_v2i64:
26641 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
26642 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
26643 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26644 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [33,33]
26645 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
26646 ; BITALG-NEXT: retq
26647 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
26648 %3 = icmp ugt <2 x i64> %2, <i64 33, i64 33>
26649 %4 = sext <2 x i1> %3 to <2 x i64>
26653 define <2 x i64> @ult_34_v2i64(<2 x i64> %0) {
26654 ; SSE2-LABEL: ult_34_v2i64:
26656 ; SSE2-NEXT: movdqa %xmm0, %xmm1
26657 ; SSE2-NEXT: psrlw $1, %xmm1
26658 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26659 ; SSE2-NEXT: psubb %xmm1, %xmm0
26660 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26661 ; SSE2-NEXT: movdqa %xmm0, %xmm2
26662 ; SSE2-NEXT: pand %xmm1, %xmm2
26663 ; SSE2-NEXT: psrlw $2, %xmm0
26664 ; SSE2-NEXT: pand %xmm1, %xmm0
26665 ; SSE2-NEXT: paddb %xmm2, %xmm0
26666 ; SSE2-NEXT: movdqa %xmm0, %xmm1
26667 ; SSE2-NEXT: psrlw $4, %xmm1
26668 ; SSE2-NEXT: paddb %xmm0, %xmm1
26669 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26670 ; SSE2-NEXT: pxor %xmm0, %xmm0
26671 ; SSE2-NEXT: psadbw %xmm1, %xmm0
26672 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
26673 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [34,34,34,34]
26674 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
26677 ; SSE3-LABEL: ult_34_v2i64:
26679 ; SSE3-NEXT: movdqa %xmm0, %xmm1
26680 ; SSE3-NEXT: psrlw $1, %xmm1
26681 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26682 ; SSE3-NEXT: psubb %xmm1, %xmm0
26683 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26684 ; SSE3-NEXT: movdqa %xmm0, %xmm2
26685 ; SSE3-NEXT: pand %xmm1, %xmm2
26686 ; SSE3-NEXT: psrlw $2, %xmm0
26687 ; SSE3-NEXT: pand %xmm1, %xmm0
26688 ; SSE3-NEXT: paddb %xmm2, %xmm0
26689 ; SSE3-NEXT: movdqa %xmm0, %xmm1
26690 ; SSE3-NEXT: psrlw $4, %xmm1
26691 ; SSE3-NEXT: paddb %xmm0, %xmm1
26692 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26693 ; SSE3-NEXT: pxor %xmm0, %xmm0
26694 ; SSE3-NEXT: psadbw %xmm1, %xmm0
26695 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
26696 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [34,34,34,34]
26697 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
26700 ; SSSE3-LABEL: ult_34_v2i64:
26702 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26703 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
26704 ; SSSE3-NEXT: pand %xmm1, %xmm2
26705 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26706 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
26707 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
26708 ; SSSE3-NEXT: psrlw $4, %xmm0
26709 ; SSSE3-NEXT: pand %xmm1, %xmm0
26710 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
26711 ; SSSE3-NEXT: paddb %xmm4, %xmm3
26712 ; SSSE3-NEXT: pxor %xmm0, %xmm0
26713 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
26714 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
26715 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [34,34,34,34]
26716 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
26719 ; SSE41-LABEL: ult_34_v2i64:
26721 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26722 ; SSE41-NEXT: movdqa %xmm0, %xmm2
26723 ; SSE41-NEXT: pand %xmm1, %xmm2
26724 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26725 ; SSE41-NEXT: movdqa %xmm3, %xmm4
26726 ; SSE41-NEXT: pshufb %xmm2, %xmm4
26727 ; SSE41-NEXT: psrlw $4, %xmm0
26728 ; SSE41-NEXT: pand %xmm1, %xmm0
26729 ; SSE41-NEXT: pshufb %xmm0, %xmm3
26730 ; SSE41-NEXT: paddb %xmm4, %xmm3
26731 ; SSE41-NEXT: pxor %xmm0, %xmm0
26732 ; SSE41-NEXT: psadbw %xmm3, %xmm0
26733 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
26734 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [34,34,34,34]
26735 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
26738 ; AVX1-LABEL: ult_34_v2i64:
26740 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26741 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
26742 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26743 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
26744 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
26745 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
26746 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
26747 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
26748 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
26749 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26750 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [34,34]
26751 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
26754 ; AVX2-LABEL: ult_34_v2i64:
26756 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26757 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
26758 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26759 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
26760 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
26761 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
26762 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
26763 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
26764 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
26765 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26766 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [34,34]
26767 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
26770 ; AVX512VPOPCNTDQ-LABEL: ult_34_v2i64:
26771 ; AVX512VPOPCNTDQ: # %bb.0:
26772 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
26773 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
26774 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [34,34]
26775 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
26776 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
26777 ; AVX512VPOPCNTDQ-NEXT: retq
26779 ; AVX512VPOPCNTDQVL-LABEL: ult_34_v2i64:
26780 ; AVX512VPOPCNTDQVL: # %bb.0:
26781 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
26782 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [34,34]
26783 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
26784 ; AVX512VPOPCNTDQVL-NEXT: retq
26786 ; BITALG_NOVLX-LABEL: ult_34_v2i64:
26787 ; BITALG_NOVLX: # %bb.0:
26788 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
26789 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
26790 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
26791 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26792 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [34,34]
26793 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
26794 ; BITALG_NOVLX-NEXT: vzeroupper
26795 ; BITALG_NOVLX-NEXT: retq
26797 ; BITALG-LABEL: ult_34_v2i64:
26799 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
26800 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
26801 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26802 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [34,34]
26803 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
26804 ; BITALG-NEXT: retq
26805 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
26806 %3 = icmp ult <2 x i64> %2, <i64 34, i64 34>
26807 %4 = sext <2 x i1> %3 to <2 x i64>
26811 define <2 x i64> @ugt_34_v2i64(<2 x i64> %0) {
26812 ; SSE2-LABEL: ugt_34_v2i64:
26814 ; SSE2-NEXT: movdqa %xmm0, %xmm1
26815 ; SSE2-NEXT: psrlw $1, %xmm1
26816 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26817 ; SSE2-NEXT: psubb %xmm1, %xmm0
26818 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26819 ; SSE2-NEXT: movdqa %xmm0, %xmm2
26820 ; SSE2-NEXT: pand %xmm1, %xmm2
26821 ; SSE2-NEXT: psrlw $2, %xmm0
26822 ; SSE2-NEXT: pand %xmm1, %xmm0
26823 ; SSE2-NEXT: paddb %xmm2, %xmm0
26824 ; SSE2-NEXT: movdqa %xmm0, %xmm1
26825 ; SSE2-NEXT: psrlw $4, %xmm1
26826 ; SSE2-NEXT: paddb %xmm0, %xmm1
26827 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26828 ; SSE2-NEXT: pxor %xmm0, %xmm0
26829 ; SSE2-NEXT: psadbw %xmm1, %xmm0
26830 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
26831 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
26834 ; SSE3-LABEL: ugt_34_v2i64:
26836 ; SSE3-NEXT: movdqa %xmm0, %xmm1
26837 ; SSE3-NEXT: psrlw $1, %xmm1
26838 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26839 ; SSE3-NEXT: psubb %xmm1, %xmm0
26840 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26841 ; SSE3-NEXT: movdqa %xmm0, %xmm2
26842 ; SSE3-NEXT: pand %xmm1, %xmm2
26843 ; SSE3-NEXT: psrlw $2, %xmm0
26844 ; SSE3-NEXT: pand %xmm1, %xmm0
26845 ; SSE3-NEXT: paddb %xmm2, %xmm0
26846 ; SSE3-NEXT: movdqa %xmm0, %xmm1
26847 ; SSE3-NEXT: psrlw $4, %xmm1
26848 ; SSE3-NEXT: paddb %xmm0, %xmm1
26849 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26850 ; SSE3-NEXT: pxor %xmm0, %xmm0
26851 ; SSE3-NEXT: psadbw %xmm1, %xmm0
26852 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
26853 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
26856 ; SSSE3-LABEL: ugt_34_v2i64:
26858 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26859 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
26860 ; SSSE3-NEXT: pand %xmm1, %xmm2
26861 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26862 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
26863 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
26864 ; SSSE3-NEXT: psrlw $4, %xmm0
26865 ; SSSE3-NEXT: pand %xmm1, %xmm0
26866 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
26867 ; SSSE3-NEXT: paddb %xmm4, %xmm3
26868 ; SSSE3-NEXT: pxor %xmm0, %xmm0
26869 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
26870 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
26871 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
26874 ; SSE41-LABEL: ugt_34_v2i64:
26876 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26877 ; SSE41-NEXT: movdqa %xmm0, %xmm2
26878 ; SSE41-NEXT: pand %xmm1, %xmm2
26879 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26880 ; SSE41-NEXT: movdqa %xmm3, %xmm4
26881 ; SSE41-NEXT: pshufb %xmm2, %xmm4
26882 ; SSE41-NEXT: psrlw $4, %xmm0
26883 ; SSE41-NEXT: pand %xmm1, %xmm0
26884 ; SSE41-NEXT: pshufb %xmm0, %xmm3
26885 ; SSE41-NEXT: paddb %xmm4, %xmm3
26886 ; SSE41-NEXT: pxor %xmm0, %xmm0
26887 ; SSE41-NEXT: psadbw %xmm3, %xmm0
26888 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
26889 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
26892 ; AVX1-LABEL: ugt_34_v2i64:
26894 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26895 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
26896 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26897 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
26898 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
26899 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
26900 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
26901 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
26902 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
26903 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26904 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
26907 ; AVX2-LABEL: ugt_34_v2i64:
26909 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
26910 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
26911 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
26912 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
26913 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
26914 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
26915 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
26916 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
26917 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
26918 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26919 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
26922 ; AVX512VPOPCNTDQ-LABEL: ugt_34_v2i64:
26923 ; AVX512VPOPCNTDQ: # %bb.0:
26924 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
26925 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
26926 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
26927 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
26928 ; AVX512VPOPCNTDQ-NEXT: retq
26930 ; AVX512VPOPCNTDQVL-LABEL: ugt_34_v2i64:
26931 ; AVX512VPOPCNTDQVL: # %bb.0:
26932 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
26933 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [34,34]
26934 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
26935 ; AVX512VPOPCNTDQVL-NEXT: retq
26937 ; BITALG_NOVLX-LABEL: ugt_34_v2i64:
26938 ; BITALG_NOVLX: # %bb.0:
26939 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
26940 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
26941 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
26942 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26943 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
26944 ; BITALG_NOVLX-NEXT: vzeroupper
26945 ; BITALG_NOVLX-NEXT: retq
26947 ; BITALG-LABEL: ugt_34_v2i64:
26949 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
26950 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
26951 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
26952 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [34,34]
26953 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
26954 ; BITALG-NEXT: retq
26955 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
26956 %3 = icmp ugt <2 x i64> %2, <i64 34, i64 34>
26957 %4 = sext <2 x i1> %3 to <2 x i64>
26961 define <2 x i64> @ult_35_v2i64(<2 x i64> %0) {
26962 ; SSE2-LABEL: ult_35_v2i64:
26964 ; SSE2-NEXT: movdqa %xmm0, %xmm1
26965 ; SSE2-NEXT: psrlw $1, %xmm1
26966 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26967 ; SSE2-NEXT: psubb %xmm1, %xmm0
26968 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26969 ; SSE2-NEXT: movdqa %xmm0, %xmm2
26970 ; SSE2-NEXT: pand %xmm1, %xmm2
26971 ; SSE2-NEXT: psrlw $2, %xmm0
26972 ; SSE2-NEXT: pand %xmm1, %xmm0
26973 ; SSE2-NEXT: paddb %xmm2, %xmm0
26974 ; SSE2-NEXT: movdqa %xmm0, %xmm1
26975 ; SSE2-NEXT: psrlw $4, %xmm1
26976 ; SSE2-NEXT: paddb %xmm0, %xmm1
26977 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26978 ; SSE2-NEXT: pxor %xmm0, %xmm0
26979 ; SSE2-NEXT: psadbw %xmm1, %xmm0
26980 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
26981 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [35,35,35,35]
26982 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
26985 ; SSE3-LABEL: ult_35_v2i64:
26987 ; SSE3-NEXT: movdqa %xmm0, %xmm1
26988 ; SSE3-NEXT: psrlw $1, %xmm1
26989 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
26990 ; SSE3-NEXT: psubb %xmm1, %xmm0
26991 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
26992 ; SSE3-NEXT: movdqa %xmm0, %xmm2
26993 ; SSE3-NEXT: pand %xmm1, %xmm2
26994 ; SSE3-NEXT: psrlw $2, %xmm0
26995 ; SSE3-NEXT: pand %xmm1, %xmm0
26996 ; SSE3-NEXT: paddb %xmm2, %xmm0
26997 ; SSE3-NEXT: movdqa %xmm0, %xmm1
26998 ; SSE3-NEXT: psrlw $4, %xmm1
26999 ; SSE3-NEXT: paddb %xmm0, %xmm1
27000 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27001 ; SSE3-NEXT: pxor %xmm0, %xmm0
27002 ; SSE3-NEXT: psadbw %xmm1, %xmm0
27003 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
27004 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [35,35,35,35]
27005 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
27008 ; SSSE3-LABEL: ult_35_v2i64:
27010 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27011 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
27012 ; SSSE3-NEXT: pand %xmm1, %xmm2
27013 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27014 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
27015 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
27016 ; SSSE3-NEXT: psrlw $4, %xmm0
27017 ; SSSE3-NEXT: pand %xmm1, %xmm0
27018 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
27019 ; SSSE3-NEXT: paddb %xmm4, %xmm3
27020 ; SSSE3-NEXT: pxor %xmm0, %xmm0
27021 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
27022 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
27023 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [35,35,35,35]
27024 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
27027 ; SSE41-LABEL: ult_35_v2i64:
27029 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27030 ; SSE41-NEXT: movdqa %xmm0, %xmm2
27031 ; SSE41-NEXT: pand %xmm1, %xmm2
27032 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27033 ; SSE41-NEXT: movdqa %xmm3, %xmm4
27034 ; SSE41-NEXT: pshufb %xmm2, %xmm4
27035 ; SSE41-NEXT: psrlw $4, %xmm0
27036 ; SSE41-NEXT: pand %xmm1, %xmm0
27037 ; SSE41-NEXT: pshufb %xmm0, %xmm3
27038 ; SSE41-NEXT: paddb %xmm4, %xmm3
27039 ; SSE41-NEXT: pxor %xmm0, %xmm0
27040 ; SSE41-NEXT: psadbw %xmm3, %xmm0
27041 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
27042 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [35,35,35,35]
27043 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
27046 ; AVX1-LABEL: ult_35_v2i64:
27048 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27049 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
27050 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27051 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
27052 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
27053 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
27054 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
27055 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
27056 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
27057 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27058 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [35,35]
27059 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
27062 ; AVX2-LABEL: ult_35_v2i64:
27064 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27065 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
27066 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27067 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
27068 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
27069 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
27070 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
27071 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
27072 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
27073 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27074 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [35,35]
27075 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
27078 ; AVX512VPOPCNTDQ-LABEL: ult_35_v2i64:
27079 ; AVX512VPOPCNTDQ: # %bb.0:
27080 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
27081 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
27082 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [35,35]
27083 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
27084 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
27085 ; AVX512VPOPCNTDQ-NEXT: retq
27087 ; AVX512VPOPCNTDQVL-LABEL: ult_35_v2i64:
27088 ; AVX512VPOPCNTDQVL: # %bb.0:
27089 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
27090 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [35,35]
27091 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
27092 ; AVX512VPOPCNTDQVL-NEXT: retq
27094 ; BITALG_NOVLX-LABEL: ult_35_v2i64:
27095 ; BITALG_NOVLX: # %bb.0:
27096 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
27097 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
27098 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
27099 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27100 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [35,35]
27101 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
27102 ; BITALG_NOVLX-NEXT: vzeroupper
27103 ; BITALG_NOVLX-NEXT: retq
27105 ; BITALG-LABEL: ult_35_v2i64:
27107 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
27108 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
27109 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27110 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [35,35]
27111 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
27112 ; BITALG-NEXT: retq
27113 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
27114 %3 = icmp ult <2 x i64> %2, <i64 35, i64 35>
27115 %4 = sext <2 x i1> %3 to <2 x i64>
27119 define <2 x i64> @ugt_35_v2i64(<2 x i64> %0) {
27120 ; SSE2-LABEL: ugt_35_v2i64:
27122 ; SSE2-NEXT: movdqa %xmm0, %xmm1
27123 ; SSE2-NEXT: psrlw $1, %xmm1
27124 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27125 ; SSE2-NEXT: psubb %xmm1, %xmm0
27126 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27127 ; SSE2-NEXT: movdqa %xmm0, %xmm2
27128 ; SSE2-NEXT: pand %xmm1, %xmm2
27129 ; SSE2-NEXT: psrlw $2, %xmm0
27130 ; SSE2-NEXT: pand %xmm1, %xmm0
27131 ; SSE2-NEXT: paddb %xmm2, %xmm0
27132 ; SSE2-NEXT: movdqa %xmm0, %xmm1
27133 ; SSE2-NEXT: psrlw $4, %xmm1
27134 ; SSE2-NEXT: paddb %xmm0, %xmm1
27135 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27136 ; SSE2-NEXT: pxor %xmm0, %xmm0
27137 ; SSE2-NEXT: psadbw %xmm1, %xmm0
27138 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
27139 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
27142 ; SSE3-LABEL: ugt_35_v2i64:
27144 ; SSE3-NEXT: movdqa %xmm0, %xmm1
27145 ; SSE3-NEXT: psrlw $1, %xmm1
27146 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27147 ; SSE3-NEXT: psubb %xmm1, %xmm0
27148 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27149 ; SSE3-NEXT: movdqa %xmm0, %xmm2
27150 ; SSE3-NEXT: pand %xmm1, %xmm2
27151 ; SSE3-NEXT: psrlw $2, %xmm0
27152 ; SSE3-NEXT: pand %xmm1, %xmm0
27153 ; SSE3-NEXT: paddb %xmm2, %xmm0
27154 ; SSE3-NEXT: movdqa %xmm0, %xmm1
27155 ; SSE3-NEXT: psrlw $4, %xmm1
27156 ; SSE3-NEXT: paddb %xmm0, %xmm1
27157 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27158 ; SSE3-NEXT: pxor %xmm0, %xmm0
27159 ; SSE3-NEXT: psadbw %xmm1, %xmm0
27160 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
27161 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
27164 ; SSSE3-LABEL: ugt_35_v2i64:
27166 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27167 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
27168 ; SSSE3-NEXT: pand %xmm1, %xmm2
27169 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27170 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
27171 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
27172 ; SSSE3-NEXT: psrlw $4, %xmm0
27173 ; SSSE3-NEXT: pand %xmm1, %xmm0
27174 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
27175 ; SSSE3-NEXT: paddb %xmm4, %xmm3
27176 ; SSSE3-NEXT: pxor %xmm0, %xmm0
27177 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
27178 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
27179 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
27182 ; SSE41-LABEL: ugt_35_v2i64:
27184 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27185 ; SSE41-NEXT: movdqa %xmm0, %xmm2
27186 ; SSE41-NEXT: pand %xmm1, %xmm2
27187 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27188 ; SSE41-NEXT: movdqa %xmm3, %xmm4
27189 ; SSE41-NEXT: pshufb %xmm2, %xmm4
27190 ; SSE41-NEXT: psrlw $4, %xmm0
27191 ; SSE41-NEXT: pand %xmm1, %xmm0
27192 ; SSE41-NEXT: pshufb %xmm0, %xmm3
27193 ; SSE41-NEXT: paddb %xmm4, %xmm3
27194 ; SSE41-NEXT: pxor %xmm0, %xmm0
27195 ; SSE41-NEXT: psadbw %xmm3, %xmm0
27196 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
27197 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
27200 ; AVX1-LABEL: ugt_35_v2i64:
27202 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27203 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
27204 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27205 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
27206 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
27207 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
27208 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
27209 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
27210 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
27211 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27212 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
27215 ; AVX2-LABEL: ugt_35_v2i64:
27217 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27218 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
27219 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27220 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
27221 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
27222 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
27223 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
27224 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
27225 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
27226 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27227 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
27230 ; AVX512VPOPCNTDQ-LABEL: ugt_35_v2i64:
27231 ; AVX512VPOPCNTDQ: # %bb.0:
27232 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
27233 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
27234 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
27235 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
27236 ; AVX512VPOPCNTDQ-NEXT: retq
27238 ; AVX512VPOPCNTDQVL-LABEL: ugt_35_v2i64:
27239 ; AVX512VPOPCNTDQVL: # %bb.0:
27240 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
27241 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [35,35]
27242 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
27243 ; AVX512VPOPCNTDQVL-NEXT: retq
27245 ; BITALG_NOVLX-LABEL: ugt_35_v2i64:
27246 ; BITALG_NOVLX: # %bb.0:
27247 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
27248 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
27249 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
27250 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27251 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
27252 ; BITALG_NOVLX-NEXT: vzeroupper
27253 ; BITALG_NOVLX-NEXT: retq
27255 ; BITALG-LABEL: ugt_35_v2i64:
27257 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
27258 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
27259 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27260 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [35,35]
27261 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
27262 ; BITALG-NEXT: retq
27263 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
27264 %3 = icmp ugt <2 x i64> %2, <i64 35, i64 35>
27265 %4 = sext <2 x i1> %3 to <2 x i64>
27269 define <2 x i64> @ult_36_v2i64(<2 x i64> %0) {
27270 ; SSE2-LABEL: ult_36_v2i64:
27272 ; SSE2-NEXT: movdqa %xmm0, %xmm1
27273 ; SSE2-NEXT: psrlw $1, %xmm1
27274 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27275 ; SSE2-NEXT: psubb %xmm1, %xmm0
27276 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27277 ; SSE2-NEXT: movdqa %xmm0, %xmm2
27278 ; SSE2-NEXT: pand %xmm1, %xmm2
27279 ; SSE2-NEXT: psrlw $2, %xmm0
27280 ; SSE2-NEXT: pand %xmm1, %xmm0
27281 ; SSE2-NEXT: paddb %xmm2, %xmm0
27282 ; SSE2-NEXT: movdqa %xmm0, %xmm1
27283 ; SSE2-NEXT: psrlw $4, %xmm1
27284 ; SSE2-NEXT: paddb %xmm0, %xmm1
27285 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27286 ; SSE2-NEXT: pxor %xmm0, %xmm0
27287 ; SSE2-NEXT: psadbw %xmm1, %xmm0
27288 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
27289 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [36,36,36,36]
27290 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
27293 ; SSE3-LABEL: ult_36_v2i64:
27295 ; SSE3-NEXT: movdqa %xmm0, %xmm1
27296 ; SSE3-NEXT: psrlw $1, %xmm1
27297 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27298 ; SSE3-NEXT: psubb %xmm1, %xmm0
27299 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27300 ; SSE3-NEXT: movdqa %xmm0, %xmm2
27301 ; SSE3-NEXT: pand %xmm1, %xmm2
27302 ; SSE3-NEXT: psrlw $2, %xmm0
27303 ; SSE3-NEXT: pand %xmm1, %xmm0
27304 ; SSE3-NEXT: paddb %xmm2, %xmm0
27305 ; SSE3-NEXT: movdqa %xmm0, %xmm1
27306 ; SSE3-NEXT: psrlw $4, %xmm1
27307 ; SSE3-NEXT: paddb %xmm0, %xmm1
27308 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27309 ; SSE3-NEXT: pxor %xmm0, %xmm0
27310 ; SSE3-NEXT: psadbw %xmm1, %xmm0
27311 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
27312 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [36,36,36,36]
27313 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
27316 ; SSSE3-LABEL: ult_36_v2i64:
27318 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27319 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
27320 ; SSSE3-NEXT: pand %xmm1, %xmm2
27321 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27322 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
27323 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
27324 ; SSSE3-NEXT: psrlw $4, %xmm0
27325 ; SSSE3-NEXT: pand %xmm1, %xmm0
27326 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
27327 ; SSSE3-NEXT: paddb %xmm4, %xmm3
27328 ; SSSE3-NEXT: pxor %xmm0, %xmm0
27329 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
27330 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
27331 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [36,36,36,36]
27332 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
27335 ; SSE41-LABEL: ult_36_v2i64:
27337 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27338 ; SSE41-NEXT: movdqa %xmm0, %xmm2
27339 ; SSE41-NEXT: pand %xmm1, %xmm2
27340 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27341 ; SSE41-NEXT: movdqa %xmm3, %xmm4
27342 ; SSE41-NEXT: pshufb %xmm2, %xmm4
27343 ; SSE41-NEXT: psrlw $4, %xmm0
27344 ; SSE41-NEXT: pand %xmm1, %xmm0
27345 ; SSE41-NEXT: pshufb %xmm0, %xmm3
27346 ; SSE41-NEXT: paddb %xmm4, %xmm3
27347 ; SSE41-NEXT: pxor %xmm0, %xmm0
27348 ; SSE41-NEXT: psadbw %xmm3, %xmm0
27349 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
27350 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [36,36,36,36]
27351 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
27354 ; AVX1-LABEL: ult_36_v2i64:
27356 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27357 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
27358 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27359 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
27360 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
27361 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
27362 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
27363 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
27364 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
27365 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27366 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [36,36]
27367 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
27370 ; AVX2-LABEL: ult_36_v2i64:
27372 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27373 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
27374 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27375 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
27376 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
27377 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
27378 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
27379 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
27380 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
27381 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27382 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [36,36]
27383 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
27386 ; AVX512VPOPCNTDQ-LABEL: ult_36_v2i64:
27387 ; AVX512VPOPCNTDQ: # %bb.0:
27388 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
27389 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
27390 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [36,36]
27391 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
27392 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
27393 ; AVX512VPOPCNTDQ-NEXT: retq
27395 ; AVX512VPOPCNTDQVL-LABEL: ult_36_v2i64:
27396 ; AVX512VPOPCNTDQVL: # %bb.0:
27397 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
27398 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [36,36]
27399 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
27400 ; AVX512VPOPCNTDQVL-NEXT: retq
27402 ; BITALG_NOVLX-LABEL: ult_36_v2i64:
27403 ; BITALG_NOVLX: # %bb.0:
27404 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
27405 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
27406 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
27407 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27408 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [36,36]
27409 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
27410 ; BITALG_NOVLX-NEXT: vzeroupper
27411 ; BITALG_NOVLX-NEXT: retq
27413 ; BITALG-LABEL: ult_36_v2i64:
27415 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
27416 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
27417 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27418 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [36,36]
27419 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
27420 ; BITALG-NEXT: retq
27421 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
27422 %3 = icmp ult <2 x i64> %2, <i64 36, i64 36>
27423 %4 = sext <2 x i1> %3 to <2 x i64>
27427 define <2 x i64> @ugt_36_v2i64(<2 x i64> %0) {
27428 ; SSE2-LABEL: ugt_36_v2i64:
27430 ; SSE2-NEXT: movdqa %xmm0, %xmm1
27431 ; SSE2-NEXT: psrlw $1, %xmm1
27432 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27433 ; SSE2-NEXT: psubb %xmm1, %xmm0
27434 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27435 ; SSE2-NEXT: movdqa %xmm0, %xmm2
27436 ; SSE2-NEXT: pand %xmm1, %xmm2
27437 ; SSE2-NEXT: psrlw $2, %xmm0
27438 ; SSE2-NEXT: pand %xmm1, %xmm0
27439 ; SSE2-NEXT: paddb %xmm2, %xmm0
27440 ; SSE2-NEXT: movdqa %xmm0, %xmm1
27441 ; SSE2-NEXT: psrlw $4, %xmm1
27442 ; SSE2-NEXT: paddb %xmm0, %xmm1
27443 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27444 ; SSE2-NEXT: pxor %xmm0, %xmm0
27445 ; SSE2-NEXT: psadbw %xmm1, %xmm0
27446 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
27447 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
27450 ; SSE3-LABEL: ugt_36_v2i64:
27452 ; SSE3-NEXT: movdqa %xmm0, %xmm1
27453 ; SSE3-NEXT: psrlw $1, %xmm1
27454 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27455 ; SSE3-NEXT: psubb %xmm1, %xmm0
27456 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27457 ; SSE3-NEXT: movdqa %xmm0, %xmm2
27458 ; SSE3-NEXT: pand %xmm1, %xmm2
27459 ; SSE3-NEXT: psrlw $2, %xmm0
27460 ; SSE3-NEXT: pand %xmm1, %xmm0
27461 ; SSE3-NEXT: paddb %xmm2, %xmm0
27462 ; SSE3-NEXT: movdqa %xmm0, %xmm1
27463 ; SSE3-NEXT: psrlw $4, %xmm1
27464 ; SSE3-NEXT: paddb %xmm0, %xmm1
27465 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27466 ; SSE3-NEXT: pxor %xmm0, %xmm0
27467 ; SSE3-NEXT: psadbw %xmm1, %xmm0
27468 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
27469 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
27472 ; SSSE3-LABEL: ugt_36_v2i64:
27474 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27475 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
27476 ; SSSE3-NEXT: pand %xmm1, %xmm2
27477 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27478 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
27479 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
27480 ; SSSE3-NEXT: psrlw $4, %xmm0
27481 ; SSSE3-NEXT: pand %xmm1, %xmm0
27482 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
27483 ; SSSE3-NEXT: paddb %xmm4, %xmm3
27484 ; SSSE3-NEXT: pxor %xmm0, %xmm0
27485 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
27486 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
27487 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
27490 ; SSE41-LABEL: ugt_36_v2i64:
27492 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27493 ; SSE41-NEXT: movdqa %xmm0, %xmm2
27494 ; SSE41-NEXT: pand %xmm1, %xmm2
27495 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27496 ; SSE41-NEXT: movdqa %xmm3, %xmm4
27497 ; SSE41-NEXT: pshufb %xmm2, %xmm4
27498 ; SSE41-NEXT: psrlw $4, %xmm0
27499 ; SSE41-NEXT: pand %xmm1, %xmm0
27500 ; SSE41-NEXT: pshufb %xmm0, %xmm3
27501 ; SSE41-NEXT: paddb %xmm4, %xmm3
27502 ; SSE41-NEXT: pxor %xmm0, %xmm0
27503 ; SSE41-NEXT: psadbw %xmm3, %xmm0
27504 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
27505 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
27508 ; AVX1-LABEL: ugt_36_v2i64:
27510 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27511 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
27512 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27513 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
27514 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
27515 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
27516 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
27517 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
27518 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
27519 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27520 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
27523 ; AVX2-LABEL: ugt_36_v2i64:
27525 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27526 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
27527 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27528 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
27529 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
27530 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
27531 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
27532 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
27533 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
27534 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27535 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
27538 ; AVX512VPOPCNTDQ-LABEL: ugt_36_v2i64:
27539 ; AVX512VPOPCNTDQ: # %bb.0:
27540 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
27541 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
27542 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
27543 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
27544 ; AVX512VPOPCNTDQ-NEXT: retq
27546 ; AVX512VPOPCNTDQVL-LABEL: ugt_36_v2i64:
27547 ; AVX512VPOPCNTDQVL: # %bb.0:
27548 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
27549 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [36,36]
27550 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
27551 ; AVX512VPOPCNTDQVL-NEXT: retq
27553 ; BITALG_NOVLX-LABEL: ugt_36_v2i64:
27554 ; BITALG_NOVLX: # %bb.0:
27555 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
27556 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
27557 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
27558 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27559 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
27560 ; BITALG_NOVLX-NEXT: vzeroupper
27561 ; BITALG_NOVLX-NEXT: retq
27563 ; BITALG-LABEL: ugt_36_v2i64:
27565 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
27566 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
27567 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27568 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [36,36]
27569 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
27570 ; BITALG-NEXT: retq
27571 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
27572 %3 = icmp ugt <2 x i64> %2, <i64 36, i64 36>
27573 %4 = sext <2 x i1> %3 to <2 x i64>
27577 define <2 x i64> @ult_37_v2i64(<2 x i64> %0) {
27578 ; SSE2-LABEL: ult_37_v2i64:
27580 ; SSE2-NEXT: movdqa %xmm0, %xmm1
27581 ; SSE2-NEXT: psrlw $1, %xmm1
27582 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27583 ; SSE2-NEXT: psubb %xmm1, %xmm0
27584 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27585 ; SSE2-NEXT: movdqa %xmm0, %xmm2
27586 ; SSE2-NEXT: pand %xmm1, %xmm2
27587 ; SSE2-NEXT: psrlw $2, %xmm0
27588 ; SSE2-NEXT: pand %xmm1, %xmm0
27589 ; SSE2-NEXT: paddb %xmm2, %xmm0
27590 ; SSE2-NEXT: movdqa %xmm0, %xmm1
27591 ; SSE2-NEXT: psrlw $4, %xmm1
27592 ; SSE2-NEXT: paddb %xmm0, %xmm1
27593 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27594 ; SSE2-NEXT: pxor %xmm0, %xmm0
27595 ; SSE2-NEXT: psadbw %xmm1, %xmm0
27596 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
27597 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [37,37,37,37]
27598 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
27601 ; SSE3-LABEL: ult_37_v2i64:
27603 ; SSE3-NEXT: movdqa %xmm0, %xmm1
27604 ; SSE3-NEXT: psrlw $1, %xmm1
27605 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27606 ; SSE3-NEXT: psubb %xmm1, %xmm0
27607 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27608 ; SSE3-NEXT: movdqa %xmm0, %xmm2
27609 ; SSE3-NEXT: pand %xmm1, %xmm2
27610 ; SSE3-NEXT: psrlw $2, %xmm0
27611 ; SSE3-NEXT: pand %xmm1, %xmm0
27612 ; SSE3-NEXT: paddb %xmm2, %xmm0
27613 ; SSE3-NEXT: movdqa %xmm0, %xmm1
27614 ; SSE3-NEXT: psrlw $4, %xmm1
27615 ; SSE3-NEXT: paddb %xmm0, %xmm1
27616 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27617 ; SSE3-NEXT: pxor %xmm0, %xmm0
27618 ; SSE3-NEXT: psadbw %xmm1, %xmm0
27619 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
27620 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [37,37,37,37]
27621 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
27624 ; SSSE3-LABEL: ult_37_v2i64:
27626 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27627 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
27628 ; SSSE3-NEXT: pand %xmm1, %xmm2
27629 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27630 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
27631 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
27632 ; SSSE3-NEXT: psrlw $4, %xmm0
27633 ; SSSE3-NEXT: pand %xmm1, %xmm0
27634 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
27635 ; SSSE3-NEXT: paddb %xmm4, %xmm3
27636 ; SSSE3-NEXT: pxor %xmm0, %xmm0
27637 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
27638 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
27639 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [37,37,37,37]
27640 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
27643 ; SSE41-LABEL: ult_37_v2i64:
27645 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27646 ; SSE41-NEXT: movdqa %xmm0, %xmm2
27647 ; SSE41-NEXT: pand %xmm1, %xmm2
27648 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27649 ; SSE41-NEXT: movdqa %xmm3, %xmm4
27650 ; SSE41-NEXT: pshufb %xmm2, %xmm4
27651 ; SSE41-NEXT: psrlw $4, %xmm0
27652 ; SSE41-NEXT: pand %xmm1, %xmm0
27653 ; SSE41-NEXT: pshufb %xmm0, %xmm3
27654 ; SSE41-NEXT: paddb %xmm4, %xmm3
27655 ; SSE41-NEXT: pxor %xmm0, %xmm0
27656 ; SSE41-NEXT: psadbw %xmm3, %xmm0
27657 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
27658 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [37,37,37,37]
27659 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
27662 ; AVX1-LABEL: ult_37_v2i64:
27664 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27665 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
27666 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27667 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
27668 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
27669 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
27670 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
27671 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
27672 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
27673 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27674 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [37,37]
27675 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
27678 ; AVX2-LABEL: ult_37_v2i64:
27680 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27681 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
27682 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27683 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
27684 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
27685 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
27686 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
27687 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
27688 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
27689 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27690 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [37,37]
27691 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
27694 ; AVX512VPOPCNTDQ-LABEL: ult_37_v2i64:
27695 ; AVX512VPOPCNTDQ: # %bb.0:
27696 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
27697 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
27698 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [37,37]
27699 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
27700 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
27701 ; AVX512VPOPCNTDQ-NEXT: retq
27703 ; AVX512VPOPCNTDQVL-LABEL: ult_37_v2i64:
27704 ; AVX512VPOPCNTDQVL: # %bb.0:
27705 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
27706 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [37,37]
27707 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
27708 ; AVX512VPOPCNTDQVL-NEXT: retq
27710 ; BITALG_NOVLX-LABEL: ult_37_v2i64:
27711 ; BITALG_NOVLX: # %bb.0:
27712 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
27713 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
27714 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
27715 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27716 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [37,37]
27717 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
27718 ; BITALG_NOVLX-NEXT: vzeroupper
27719 ; BITALG_NOVLX-NEXT: retq
27721 ; BITALG-LABEL: ult_37_v2i64:
27723 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
27724 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
27725 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27726 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [37,37]
27727 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
27728 ; BITALG-NEXT: retq
27729 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
27730 %3 = icmp ult <2 x i64> %2, <i64 37, i64 37>
27731 %4 = sext <2 x i1> %3 to <2 x i64>
27735 define <2 x i64> @ugt_37_v2i64(<2 x i64> %0) {
27736 ; SSE2-LABEL: ugt_37_v2i64:
27738 ; SSE2-NEXT: movdqa %xmm0, %xmm1
27739 ; SSE2-NEXT: psrlw $1, %xmm1
27740 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27741 ; SSE2-NEXT: psubb %xmm1, %xmm0
27742 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27743 ; SSE2-NEXT: movdqa %xmm0, %xmm2
27744 ; SSE2-NEXT: pand %xmm1, %xmm2
27745 ; SSE2-NEXT: psrlw $2, %xmm0
27746 ; SSE2-NEXT: pand %xmm1, %xmm0
27747 ; SSE2-NEXT: paddb %xmm2, %xmm0
27748 ; SSE2-NEXT: movdqa %xmm0, %xmm1
27749 ; SSE2-NEXT: psrlw $4, %xmm1
27750 ; SSE2-NEXT: paddb %xmm0, %xmm1
27751 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27752 ; SSE2-NEXT: pxor %xmm0, %xmm0
27753 ; SSE2-NEXT: psadbw %xmm1, %xmm0
27754 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
27755 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
27758 ; SSE3-LABEL: ugt_37_v2i64:
27760 ; SSE3-NEXT: movdqa %xmm0, %xmm1
27761 ; SSE3-NEXT: psrlw $1, %xmm1
27762 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27763 ; SSE3-NEXT: psubb %xmm1, %xmm0
27764 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27765 ; SSE3-NEXT: movdqa %xmm0, %xmm2
27766 ; SSE3-NEXT: pand %xmm1, %xmm2
27767 ; SSE3-NEXT: psrlw $2, %xmm0
27768 ; SSE3-NEXT: pand %xmm1, %xmm0
27769 ; SSE3-NEXT: paddb %xmm2, %xmm0
27770 ; SSE3-NEXT: movdqa %xmm0, %xmm1
27771 ; SSE3-NEXT: psrlw $4, %xmm1
27772 ; SSE3-NEXT: paddb %xmm0, %xmm1
27773 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27774 ; SSE3-NEXT: pxor %xmm0, %xmm0
27775 ; SSE3-NEXT: psadbw %xmm1, %xmm0
27776 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
27777 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
27780 ; SSSE3-LABEL: ugt_37_v2i64:
27782 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27783 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
27784 ; SSSE3-NEXT: pand %xmm1, %xmm2
27785 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27786 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
27787 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
27788 ; SSSE3-NEXT: psrlw $4, %xmm0
27789 ; SSSE3-NEXT: pand %xmm1, %xmm0
27790 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
27791 ; SSSE3-NEXT: paddb %xmm4, %xmm3
27792 ; SSSE3-NEXT: pxor %xmm0, %xmm0
27793 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
27794 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
27795 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
27798 ; SSE41-LABEL: ugt_37_v2i64:
27800 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27801 ; SSE41-NEXT: movdqa %xmm0, %xmm2
27802 ; SSE41-NEXT: pand %xmm1, %xmm2
27803 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27804 ; SSE41-NEXT: movdqa %xmm3, %xmm4
27805 ; SSE41-NEXT: pshufb %xmm2, %xmm4
27806 ; SSE41-NEXT: psrlw $4, %xmm0
27807 ; SSE41-NEXT: pand %xmm1, %xmm0
27808 ; SSE41-NEXT: pshufb %xmm0, %xmm3
27809 ; SSE41-NEXT: paddb %xmm4, %xmm3
27810 ; SSE41-NEXT: pxor %xmm0, %xmm0
27811 ; SSE41-NEXT: psadbw %xmm3, %xmm0
27812 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
27813 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
27816 ; AVX1-LABEL: ugt_37_v2i64:
27818 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27819 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
27820 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27821 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
27822 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
27823 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
27824 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
27825 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
27826 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
27827 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27828 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
27831 ; AVX2-LABEL: ugt_37_v2i64:
27833 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27834 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
27835 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27836 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
27837 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
27838 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
27839 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
27840 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
27841 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
27842 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27843 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
27846 ; AVX512VPOPCNTDQ-LABEL: ugt_37_v2i64:
27847 ; AVX512VPOPCNTDQ: # %bb.0:
27848 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
27849 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
27850 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
27851 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
27852 ; AVX512VPOPCNTDQ-NEXT: retq
27854 ; AVX512VPOPCNTDQVL-LABEL: ugt_37_v2i64:
27855 ; AVX512VPOPCNTDQVL: # %bb.0:
27856 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
27857 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [37,37]
27858 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
27859 ; AVX512VPOPCNTDQVL-NEXT: retq
27861 ; BITALG_NOVLX-LABEL: ugt_37_v2i64:
27862 ; BITALG_NOVLX: # %bb.0:
27863 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
27864 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
27865 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
27866 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27867 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
27868 ; BITALG_NOVLX-NEXT: vzeroupper
27869 ; BITALG_NOVLX-NEXT: retq
27871 ; BITALG-LABEL: ugt_37_v2i64:
27873 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
27874 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
27875 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27876 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [37,37]
27877 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
27878 ; BITALG-NEXT: retq
27879 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
27880 %3 = icmp ugt <2 x i64> %2, <i64 37, i64 37>
27881 %4 = sext <2 x i1> %3 to <2 x i64>
27885 define <2 x i64> @ult_38_v2i64(<2 x i64> %0) {
27886 ; SSE2-LABEL: ult_38_v2i64:
27888 ; SSE2-NEXT: movdqa %xmm0, %xmm1
27889 ; SSE2-NEXT: psrlw $1, %xmm1
27890 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27891 ; SSE2-NEXT: psubb %xmm1, %xmm0
27892 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27893 ; SSE2-NEXT: movdqa %xmm0, %xmm2
27894 ; SSE2-NEXT: pand %xmm1, %xmm2
27895 ; SSE2-NEXT: psrlw $2, %xmm0
27896 ; SSE2-NEXT: pand %xmm1, %xmm0
27897 ; SSE2-NEXT: paddb %xmm2, %xmm0
27898 ; SSE2-NEXT: movdqa %xmm0, %xmm1
27899 ; SSE2-NEXT: psrlw $4, %xmm1
27900 ; SSE2-NEXT: paddb %xmm0, %xmm1
27901 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27902 ; SSE2-NEXT: pxor %xmm0, %xmm0
27903 ; SSE2-NEXT: psadbw %xmm1, %xmm0
27904 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
27905 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [38,38,38,38]
27906 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
27909 ; SSE3-LABEL: ult_38_v2i64:
27911 ; SSE3-NEXT: movdqa %xmm0, %xmm1
27912 ; SSE3-NEXT: psrlw $1, %xmm1
27913 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27914 ; SSE3-NEXT: psubb %xmm1, %xmm0
27915 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
27916 ; SSE3-NEXT: movdqa %xmm0, %xmm2
27917 ; SSE3-NEXT: pand %xmm1, %xmm2
27918 ; SSE3-NEXT: psrlw $2, %xmm0
27919 ; SSE3-NEXT: pand %xmm1, %xmm0
27920 ; SSE3-NEXT: paddb %xmm2, %xmm0
27921 ; SSE3-NEXT: movdqa %xmm0, %xmm1
27922 ; SSE3-NEXT: psrlw $4, %xmm1
27923 ; SSE3-NEXT: paddb %xmm0, %xmm1
27924 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
27925 ; SSE3-NEXT: pxor %xmm0, %xmm0
27926 ; SSE3-NEXT: psadbw %xmm1, %xmm0
27927 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
27928 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [38,38,38,38]
27929 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
27932 ; SSSE3-LABEL: ult_38_v2i64:
27934 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27935 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
27936 ; SSSE3-NEXT: pand %xmm1, %xmm2
27937 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27938 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
27939 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
27940 ; SSSE3-NEXT: psrlw $4, %xmm0
27941 ; SSSE3-NEXT: pand %xmm1, %xmm0
27942 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
27943 ; SSSE3-NEXT: paddb %xmm4, %xmm3
27944 ; SSSE3-NEXT: pxor %xmm0, %xmm0
27945 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
27946 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
27947 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [38,38,38,38]
27948 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
27951 ; SSE41-LABEL: ult_38_v2i64:
27953 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27954 ; SSE41-NEXT: movdqa %xmm0, %xmm2
27955 ; SSE41-NEXT: pand %xmm1, %xmm2
27956 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27957 ; SSE41-NEXT: movdqa %xmm3, %xmm4
27958 ; SSE41-NEXT: pshufb %xmm2, %xmm4
27959 ; SSE41-NEXT: psrlw $4, %xmm0
27960 ; SSE41-NEXT: pand %xmm1, %xmm0
27961 ; SSE41-NEXT: pshufb %xmm0, %xmm3
27962 ; SSE41-NEXT: paddb %xmm4, %xmm3
27963 ; SSE41-NEXT: pxor %xmm0, %xmm0
27964 ; SSE41-NEXT: psadbw %xmm3, %xmm0
27965 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
27966 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [38,38,38,38]
27967 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
27970 ; AVX1-LABEL: ult_38_v2i64:
27972 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27973 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
27974 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27975 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
27976 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
27977 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
27978 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
27979 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
27980 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
27981 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27982 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [38,38]
27983 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
27986 ; AVX2-LABEL: ult_38_v2i64:
27988 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
27989 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
27990 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
27991 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
27992 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
27993 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
27994 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
27995 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
27996 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
27997 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
27998 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [38,38]
27999 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28002 ; AVX512VPOPCNTDQ-LABEL: ult_38_v2i64:
28003 ; AVX512VPOPCNTDQ: # %bb.0:
28004 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
28005 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
28006 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [38,38]
28007 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28008 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
28009 ; AVX512VPOPCNTDQ-NEXT: retq
28011 ; AVX512VPOPCNTDQVL-LABEL: ult_38_v2i64:
28012 ; AVX512VPOPCNTDQVL: # %bb.0:
28013 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
28014 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [38,38]
28015 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28016 ; AVX512VPOPCNTDQVL-NEXT: retq
28018 ; BITALG_NOVLX-LABEL: ult_38_v2i64:
28019 ; BITALG_NOVLX: # %bb.0:
28020 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
28021 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
28022 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
28023 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28024 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [38,38]
28025 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28026 ; BITALG_NOVLX-NEXT: vzeroupper
28027 ; BITALG_NOVLX-NEXT: retq
28029 ; BITALG-LABEL: ult_38_v2i64:
28031 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
28032 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
28033 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28034 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [38,38]
28035 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28036 ; BITALG-NEXT: retq
28037 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
28038 %3 = icmp ult <2 x i64> %2, <i64 38, i64 38>
28039 %4 = sext <2 x i1> %3 to <2 x i64>
28043 define <2 x i64> @ugt_38_v2i64(<2 x i64> %0) {
28044 ; SSE2-LABEL: ugt_38_v2i64:
28046 ; SSE2-NEXT: movdqa %xmm0, %xmm1
28047 ; SSE2-NEXT: psrlw $1, %xmm1
28048 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28049 ; SSE2-NEXT: psubb %xmm1, %xmm0
28050 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28051 ; SSE2-NEXT: movdqa %xmm0, %xmm2
28052 ; SSE2-NEXT: pand %xmm1, %xmm2
28053 ; SSE2-NEXT: psrlw $2, %xmm0
28054 ; SSE2-NEXT: pand %xmm1, %xmm0
28055 ; SSE2-NEXT: paddb %xmm2, %xmm0
28056 ; SSE2-NEXT: movdqa %xmm0, %xmm1
28057 ; SSE2-NEXT: psrlw $4, %xmm1
28058 ; SSE2-NEXT: paddb %xmm0, %xmm1
28059 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28060 ; SSE2-NEXT: pxor %xmm0, %xmm0
28061 ; SSE2-NEXT: psadbw %xmm1, %xmm0
28062 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
28063 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
28066 ; SSE3-LABEL: ugt_38_v2i64:
28068 ; SSE3-NEXT: movdqa %xmm0, %xmm1
28069 ; SSE3-NEXT: psrlw $1, %xmm1
28070 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28071 ; SSE3-NEXT: psubb %xmm1, %xmm0
28072 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28073 ; SSE3-NEXT: movdqa %xmm0, %xmm2
28074 ; SSE3-NEXT: pand %xmm1, %xmm2
28075 ; SSE3-NEXT: psrlw $2, %xmm0
28076 ; SSE3-NEXT: pand %xmm1, %xmm0
28077 ; SSE3-NEXT: paddb %xmm2, %xmm0
28078 ; SSE3-NEXT: movdqa %xmm0, %xmm1
28079 ; SSE3-NEXT: psrlw $4, %xmm1
28080 ; SSE3-NEXT: paddb %xmm0, %xmm1
28081 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28082 ; SSE3-NEXT: pxor %xmm0, %xmm0
28083 ; SSE3-NEXT: psadbw %xmm1, %xmm0
28084 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
28085 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
28088 ; SSSE3-LABEL: ugt_38_v2i64:
28090 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28091 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
28092 ; SSSE3-NEXT: pand %xmm1, %xmm2
28093 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28094 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
28095 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
28096 ; SSSE3-NEXT: psrlw $4, %xmm0
28097 ; SSSE3-NEXT: pand %xmm1, %xmm0
28098 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
28099 ; SSSE3-NEXT: paddb %xmm4, %xmm3
28100 ; SSSE3-NEXT: pxor %xmm0, %xmm0
28101 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
28102 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
28103 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
28106 ; SSE41-LABEL: ugt_38_v2i64:
28108 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28109 ; SSE41-NEXT: movdqa %xmm0, %xmm2
28110 ; SSE41-NEXT: pand %xmm1, %xmm2
28111 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28112 ; SSE41-NEXT: movdqa %xmm3, %xmm4
28113 ; SSE41-NEXT: pshufb %xmm2, %xmm4
28114 ; SSE41-NEXT: psrlw $4, %xmm0
28115 ; SSE41-NEXT: pand %xmm1, %xmm0
28116 ; SSE41-NEXT: pshufb %xmm0, %xmm3
28117 ; SSE41-NEXT: paddb %xmm4, %xmm3
28118 ; SSE41-NEXT: pxor %xmm0, %xmm0
28119 ; SSE41-NEXT: psadbw %xmm3, %xmm0
28120 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
28121 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
28124 ; AVX1-LABEL: ugt_38_v2i64:
28126 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28127 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
28128 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28129 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
28130 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
28131 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
28132 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
28133 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
28134 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
28135 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28136 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
28139 ; AVX2-LABEL: ugt_38_v2i64:
28141 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28142 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
28143 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28144 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
28145 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
28146 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
28147 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
28148 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
28149 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
28150 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28151 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
28154 ; AVX512VPOPCNTDQ-LABEL: ugt_38_v2i64:
28155 ; AVX512VPOPCNTDQ: # %bb.0:
28156 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
28157 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
28158 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
28159 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
28160 ; AVX512VPOPCNTDQ-NEXT: retq
28162 ; AVX512VPOPCNTDQVL-LABEL: ugt_38_v2i64:
28163 ; AVX512VPOPCNTDQVL: # %bb.0:
28164 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
28165 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [38,38]
28166 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
28167 ; AVX512VPOPCNTDQVL-NEXT: retq
28169 ; BITALG_NOVLX-LABEL: ugt_38_v2i64:
28170 ; BITALG_NOVLX: # %bb.0:
28171 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
28172 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
28173 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
28174 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28175 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
28176 ; BITALG_NOVLX-NEXT: vzeroupper
28177 ; BITALG_NOVLX-NEXT: retq
28179 ; BITALG-LABEL: ugt_38_v2i64:
28181 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
28182 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
28183 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28184 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [38,38]
28185 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
28186 ; BITALG-NEXT: retq
28187 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
28188 %3 = icmp ugt <2 x i64> %2, <i64 38, i64 38>
28189 %4 = sext <2 x i1> %3 to <2 x i64>
28193 define <2 x i64> @ult_39_v2i64(<2 x i64> %0) {
28194 ; SSE2-LABEL: ult_39_v2i64:
28196 ; SSE2-NEXT: movdqa %xmm0, %xmm1
28197 ; SSE2-NEXT: psrlw $1, %xmm1
28198 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28199 ; SSE2-NEXT: psubb %xmm1, %xmm0
28200 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28201 ; SSE2-NEXT: movdqa %xmm0, %xmm2
28202 ; SSE2-NEXT: pand %xmm1, %xmm2
28203 ; SSE2-NEXT: psrlw $2, %xmm0
28204 ; SSE2-NEXT: pand %xmm1, %xmm0
28205 ; SSE2-NEXT: paddb %xmm2, %xmm0
28206 ; SSE2-NEXT: movdqa %xmm0, %xmm1
28207 ; SSE2-NEXT: psrlw $4, %xmm1
28208 ; SSE2-NEXT: paddb %xmm0, %xmm1
28209 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28210 ; SSE2-NEXT: pxor %xmm0, %xmm0
28211 ; SSE2-NEXT: psadbw %xmm1, %xmm0
28212 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
28213 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [39,39,39,39]
28214 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
28217 ; SSE3-LABEL: ult_39_v2i64:
28219 ; SSE3-NEXT: movdqa %xmm0, %xmm1
28220 ; SSE3-NEXT: psrlw $1, %xmm1
28221 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28222 ; SSE3-NEXT: psubb %xmm1, %xmm0
28223 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28224 ; SSE3-NEXT: movdqa %xmm0, %xmm2
28225 ; SSE3-NEXT: pand %xmm1, %xmm2
28226 ; SSE3-NEXT: psrlw $2, %xmm0
28227 ; SSE3-NEXT: pand %xmm1, %xmm0
28228 ; SSE3-NEXT: paddb %xmm2, %xmm0
28229 ; SSE3-NEXT: movdqa %xmm0, %xmm1
28230 ; SSE3-NEXT: psrlw $4, %xmm1
28231 ; SSE3-NEXT: paddb %xmm0, %xmm1
28232 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28233 ; SSE3-NEXT: pxor %xmm0, %xmm0
28234 ; SSE3-NEXT: psadbw %xmm1, %xmm0
28235 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
28236 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [39,39,39,39]
28237 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
28240 ; SSSE3-LABEL: ult_39_v2i64:
28242 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28243 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
28244 ; SSSE3-NEXT: pand %xmm1, %xmm2
28245 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28246 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
28247 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
28248 ; SSSE3-NEXT: psrlw $4, %xmm0
28249 ; SSSE3-NEXT: pand %xmm1, %xmm0
28250 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
28251 ; SSSE3-NEXT: paddb %xmm4, %xmm3
28252 ; SSSE3-NEXT: pxor %xmm0, %xmm0
28253 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
28254 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
28255 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [39,39,39,39]
28256 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
28259 ; SSE41-LABEL: ult_39_v2i64:
28261 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28262 ; SSE41-NEXT: movdqa %xmm0, %xmm2
28263 ; SSE41-NEXT: pand %xmm1, %xmm2
28264 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28265 ; SSE41-NEXT: movdqa %xmm3, %xmm4
28266 ; SSE41-NEXT: pshufb %xmm2, %xmm4
28267 ; SSE41-NEXT: psrlw $4, %xmm0
28268 ; SSE41-NEXT: pand %xmm1, %xmm0
28269 ; SSE41-NEXT: pshufb %xmm0, %xmm3
28270 ; SSE41-NEXT: paddb %xmm4, %xmm3
28271 ; SSE41-NEXT: pxor %xmm0, %xmm0
28272 ; SSE41-NEXT: psadbw %xmm3, %xmm0
28273 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
28274 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [39,39,39,39]
28275 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
28278 ; AVX1-LABEL: ult_39_v2i64:
28280 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28281 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
28282 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28283 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
28284 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
28285 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
28286 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
28287 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
28288 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
28289 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28290 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [39,39]
28291 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28294 ; AVX2-LABEL: ult_39_v2i64:
28296 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28297 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
28298 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28299 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
28300 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
28301 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
28302 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
28303 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
28304 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
28305 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28306 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [39,39]
28307 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28310 ; AVX512VPOPCNTDQ-LABEL: ult_39_v2i64:
28311 ; AVX512VPOPCNTDQ: # %bb.0:
28312 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
28313 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
28314 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [39,39]
28315 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28316 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
28317 ; AVX512VPOPCNTDQ-NEXT: retq
28319 ; AVX512VPOPCNTDQVL-LABEL: ult_39_v2i64:
28320 ; AVX512VPOPCNTDQVL: # %bb.0:
28321 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
28322 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [39,39]
28323 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28324 ; AVX512VPOPCNTDQVL-NEXT: retq
28326 ; BITALG_NOVLX-LABEL: ult_39_v2i64:
28327 ; BITALG_NOVLX: # %bb.0:
28328 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
28329 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
28330 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
28331 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28332 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [39,39]
28333 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28334 ; BITALG_NOVLX-NEXT: vzeroupper
28335 ; BITALG_NOVLX-NEXT: retq
28337 ; BITALG-LABEL: ult_39_v2i64:
28339 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
28340 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
28341 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28342 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [39,39]
28343 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28344 ; BITALG-NEXT: retq
28345 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
28346 %3 = icmp ult <2 x i64> %2, <i64 39, i64 39>
28347 %4 = sext <2 x i1> %3 to <2 x i64>
28351 define <2 x i64> @ugt_39_v2i64(<2 x i64> %0) {
28352 ; SSE2-LABEL: ugt_39_v2i64:
28354 ; SSE2-NEXT: movdqa %xmm0, %xmm1
28355 ; SSE2-NEXT: psrlw $1, %xmm1
28356 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28357 ; SSE2-NEXT: psubb %xmm1, %xmm0
28358 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28359 ; SSE2-NEXT: movdqa %xmm0, %xmm2
28360 ; SSE2-NEXT: pand %xmm1, %xmm2
28361 ; SSE2-NEXT: psrlw $2, %xmm0
28362 ; SSE2-NEXT: pand %xmm1, %xmm0
28363 ; SSE2-NEXT: paddb %xmm2, %xmm0
28364 ; SSE2-NEXT: movdqa %xmm0, %xmm1
28365 ; SSE2-NEXT: psrlw $4, %xmm1
28366 ; SSE2-NEXT: paddb %xmm0, %xmm1
28367 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28368 ; SSE2-NEXT: pxor %xmm0, %xmm0
28369 ; SSE2-NEXT: psadbw %xmm1, %xmm0
28370 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
28371 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
28374 ; SSE3-LABEL: ugt_39_v2i64:
28376 ; SSE3-NEXT: movdqa %xmm0, %xmm1
28377 ; SSE3-NEXT: psrlw $1, %xmm1
28378 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28379 ; SSE3-NEXT: psubb %xmm1, %xmm0
28380 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28381 ; SSE3-NEXT: movdqa %xmm0, %xmm2
28382 ; SSE3-NEXT: pand %xmm1, %xmm2
28383 ; SSE3-NEXT: psrlw $2, %xmm0
28384 ; SSE3-NEXT: pand %xmm1, %xmm0
28385 ; SSE3-NEXT: paddb %xmm2, %xmm0
28386 ; SSE3-NEXT: movdqa %xmm0, %xmm1
28387 ; SSE3-NEXT: psrlw $4, %xmm1
28388 ; SSE3-NEXT: paddb %xmm0, %xmm1
28389 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28390 ; SSE3-NEXT: pxor %xmm0, %xmm0
28391 ; SSE3-NEXT: psadbw %xmm1, %xmm0
28392 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
28393 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
28396 ; SSSE3-LABEL: ugt_39_v2i64:
28398 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28399 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
28400 ; SSSE3-NEXT: pand %xmm1, %xmm2
28401 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28402 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
28403 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
28404 ; SSSE3-NEXT: psrlw $4, %xmm0
28405 ; SSSE3-NEXT: pand %xmm1, %xmm0
28406 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
28407 ; SSSE3-NEXT: paddb %xmm4, %xmm3
28408 ; SSSE3-NEXT: pxor %xmm0, %xmm0
28409 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
28410 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
28411 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
28414 ; SSE41-LABEL: ugt_39_v2i64:
28416 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28417 ; SSE41-NEXT: movdqa %xmm0, %xmm2
28418 ; SSE41-NEXT: pand %xmm1, %xmm2
28419 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28420 ; SSE41-NEXT: movdqa %xmm3, %xmm4
28421 ; SSE41-NEXT: pshufb %xmm2, %xmm4
28422 ; SSE41-NEXT: psrlw $4, %xmm0
28423 ; SSE41-NEXT: pand %xmm1, %xmm0
28424 ; SSE41-NEXT: pshufb %xmm0, %xmm3
28425 ; SSE41-NEXT: paddb %xmm4, %xmm3
28426 ; SSE41-NEXT: pxor %xmm0, %xmm0
28427 ; SSE41-NEXT: psadbw %xmm3, %xmm0
28428 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
28429 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
28432 ; AVX1-LABEL: ugt_39_v2i64:
28434 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28435 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
28436 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28437 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
28438 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
28439 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
28440 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
28441 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
28442 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
28443 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28444 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
28447 ; AVX2-LABEL: ugt_39_v2i64:
28449 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28450 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
28451 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28452 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
28453 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
28454 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
28455 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
28456 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
28457 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
28458 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28459 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
28462 ; AVX512VPOPCNTDQ-LABEL: ugt_39_v2i64:
28463 ; AVX512VPOPCNTDQ: # %bb.0:
28464 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
28465 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
28466 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
28467 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
28468 ; AVX512VPOPCNTDQ-NEXT: retq
28470 ; AVX512VPOPCNTDQVL-LABEL: ugt_39_v2i64:
28471 ; AVX512VPOPCNTDQVL: # %bb.0:
28472 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
28473 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [39,39]
28474 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
28475 ; AVX512VPOPCNTDQVL-NEXT: retq
28477 ; BITALG_NOVLX-LABEL: ugt_39_v2i64:
28478 ; BITALG_NOVLX: # %bb.0:
28479 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
28480 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
28481 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
28482 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28483 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
28484 ; BITALG_NOVLX-NEXT: vzeroupper
28485 ; BITALG_NOVLX-NEXT: retq
28487 ; BITALG-LABEL: ugt_39_v2i64:
28489 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
28490 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
28491 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28492 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [39,39]
28493 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
28494 ; BITALG-NEXT: retq
28495 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
28496 %3 = icmp ugt <2 x i64> %2, <i64 39, i64 39>
28497 %4 = sext <2 x i1> %3 to <2 x i64>
28501 define <2 x i64> @ult_40_v2i64(<2 x i64> %0) {
28502 ; SSE2-LABEL: ult_40_v2i64:
28504 ; SSE2-NEXT: movdqa %xmm0, %xmm1
28505 ; SSE2-NEXT: psrlw $1, %xmm1
28506 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28507 ; SSE2-NEXT: psubb %xmm1, %xmm0
28508 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28509 ; SSE2-NEXT: movdqa %xmm0, %xmm2
28510 ; SSE2-NEXT: pand %xmm1, %xmm2
28511 ; SSE2-NEXT: psrlw $2, %xmm0
28512 ; SSE2-NEXT: pand %xmm1, %xmm0
28513 ; SSE2-NEXT: paddb %xmm2, %xmm0
28514 ; SSE2-NEXT: movdqa %xmm0, %xmm1
28515 ; SSE2-NEXT: psrlw $4, %xmm1
28516 ; SSE2-NEXT: paddb %xmm0, %xmm1
28517 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28518 ; SSE2-NEXT: pxor %xmm0, %xmm0
28519 ; SSE2-NEXT: psadbw %xmm1, %xmm0
28520 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
28521 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [40,40,40,40]
28522 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
28525 ; SSE3-LABEL: ult_40_v2i64:
28527 ; SSE3-NEXT: movdqa %xmm0, %xmm1
28528 ; SSE3-NEXT: psrlw $1, %xmm1
28529 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28530 ; SSE3-NEXT: psubb %xmm1, %xmm0
28531 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28532 ; SSE3-NEXT: movdqa %xmm0, %xmm2
28533 ; SSE3-NEXT: pand %xmm1, %xmm2
28534 ; SSE3-NEXT: psrlw $2, %xmm0
28535 ; SSE3-NEXT: pand %xmm1, %xmm0
28536 ; SSE3-NEXT: paddb %xmm2, %xmm0
28537 ; SSE3-NEXT: movdqa %xmm0, %xmm1
28538 ; SSE3-NEXT: psrlw $4, %xmm1
28539 ; SSE3-NEXT: paddb %xmm0, %xmm1
28540 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28541 ; SSE3-NEXT: pxor %xmm0, %xmm0
28542 ; SSE3-NEXT: psadbw %xmm1, %xmm0
28543 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
28544 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [40,40,40,40]
28545 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
28548 ; SSSE3-LABEL: ult_40_v2i64:
28550 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28551 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
28552 ; SSSE3-NEXT: pand %xmm1, %xmm2
28553 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28554 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
28555 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
28556 ; SSSE3-NEXT: psrlw $4, %xmm0
28557 ; SSSE3-NEXT: pand %xmm1, %xmm0
28558 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
28559 ; SSSE3-NEXT: paddb %xmm4, %xmm3
28560 ; SSSE3-NEXT: pxor %xmm0, %xmm0
28561 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
28562 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
28563 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [40,40,40,40]
28564 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
28567 ; SSE41-LABEL: ult_40_v2i64:
28569 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28570 ; SSE41-NEXT: movdqa %xmm0, %xmm2
28571 ; SSE41-NEXT: pand %xmm1, %xmm2
28572 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28573 ; SSE41-NEXT: movdqa %xmm3, %xmm4
28574 ; SSE41-NEXT: pshufb %xmm2, %xmm4
28575 ; SSE41-NEXT: psrlw $4, %xmm0
28576 ; SSE41-NEXT: pand %xmm1, %xmm0
28577 ; SSE41-NEXT: pshufb %xmm0, %xmm3
28578 ; SSE41-NEXT: paddb %xmm4, %xmm3
28579 ; SSE41-NEXT: pxor %xmm0, %xmm0
28580 ; SSE41-NEXT: psadbw %xmm3, %xmm0
28581 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
28582 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [40,40,40,40]
28583 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
28586 ; AVX1-LABEL: ult_40_v2i64:
28588 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28589 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
28590 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28591 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
28592 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
28593 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
28594 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
28595 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
28596 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
28597 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28598 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [40,40]
28599 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28602 ; AVX2-LABEL: ult_40_v2i64:
28604 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28605 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
28606 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28607 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
28608 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
28609 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
28610 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
28611 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
28612 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
28613 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28614 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [40,40]
28615 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28618 ; AVX512VPOPCNTDQ-LABEL: ult_40_v2i64:
28619 ; AVX512VPOPCNTDQ: # %bb.0:
28620 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
28621 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
28622 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [40,40]
28623 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28624 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
28625 ; AVX512VPOPCNTDQ-NEXT: retq
28627 ; AVX512VPOPCNTDQVL-LABEL: ult_40_v2i64:
28628 ; AVX512VPOPCNTDQVL: # %bb.0:
28629 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
28630 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [40,40]
28631 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28632 ; AVX512VPOPCNTDQVL-NEXT: retq
28634 ; BITALG_NOVLX-LABEL: ult_40_v2i64:
28635 ; BITALG_NOVLX: # %bb.0:
28636 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
28637 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
28638 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
28639 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28640 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [40,40]
28641 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28642 ; BITALG_NOVLX-NEXT: vzeroupper
28643 ; BITALG_NOVLX-NEXT: retq
28645 ; BITALG-LABEL: ult_40_v2i64:
28647 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
28648 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
28649 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28650 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [40,40]
28651 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28652 ; BITALG-NEXT: retq
28653 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
28654 %3 = icmp ult <2 x i64> %2, <i64 40, i64 40>
28655 %4 = sext <2 x i1> %3 to <2 x i64>
28659 define <2 x i64> @ugt_40_v2i64(<2 x i64> %0) {
28660 ; SSE2-LABEL: ugt_40_v2i64:
28662 ; SSE2-NEXT: movdqa %xmm0, %xmm1
28663 ; SSE2-NEXT: psrlw $1, %xmm1
28664 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28665 ; SSE2-NEXT: psubb %xmm1, %xmm0
28666 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28667 ; SSE2-NEXT: movdqa %xmm0, %xmm2
28668 ; SSE2-NEXT: pand %xmm1, %xmm2
28669 ; SSE2-NEXT: psrlw $2, %xmm0
28670 ; SSE2-NEXT: pand %xmm1, %xmm0
28671 ; SSE2-NEXT: paddb %xmm2, %xmm0
28672 ; SSE2-NEXT: movdqa %xmm0, %xmm1
28673 ; SSE2-NEXT: psrlw $4, %xmm1
28674 ; SSE2-NEXT: paddb %xmm0, %xmm1
28675 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28676 ; SSE2-NEXT: pxor %xmm0, %xmm0
28677 ; SSE2-NEXT: psadbw %xmm1, %xmm0
28678 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
28679 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
28682 ; SSE3-LABEL: ugt_40_v2i64:
28684 ; SSE3-NEXT: movdqa %xmm0, %xmm1
28685 ; SSE3-NEXT: psrlw $1, %xmm1
28686 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28687 ; SSE3-NEXT: psubb %xmm1, %xmm0
28688 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28689 ; SSE3-NEXT: movdqa %xmm0, %xmm2
28690 ; SSE3-NEXT: pand %xmm1, %xmm2
28691 ; SSE3-NEXT: psrlw $2, %xmm0
28692 ; SSE3-NEXT: pand %xmm1, %xmm0
28693 ; SSE3-NEXT: paddb %xmm2, %xmm0
28694 ; SSE3-NEXT: movdqa %xmm0, %xmm1
28695 ; SSE3-NEXT: psrlw $4, %xmm1
28696 ; SSE3-NEXT: paddb %xmm0, %xmm1
28697 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28698 ; SSE3-NEXT: pxor %xmm0, %xmm0
28699 ; SSE3-NEXT: psadbw %xmm1, %xmm0
28700 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
28701 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
28704 ; SSSE3-LABEL: ugt_40_v2i64:
28706 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28707 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
28708 ; SSSE3-NEXT: pand %xmm1, %xmm2
28709 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28710 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
28711 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
28712 ; SSSE3-NEXT: psrlw $4, %xmm0
28713 ; SSSE3-NEXT: pand %xmm1, %xmm0
28714 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
28715 ; SSSE3-NEXT: paddb %xmm4, %xmm3
28716 ; SSSE3-NEXT: pxor %xmm0, %xmm0
28717 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
28718 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
28719 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
28722 ; SSE41-LABEL: ugt_40_v2i64:
28724 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28725 ; SSE41-NEXT: movdqa %xmm0, %xmm2
28726 ; SSE41-NEXT: pand %xmm1, %xmm2
28727 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28728 ; SSE41-NEXT: movdqa %xmm3, %xmm4
28729 ; SSE41-NEXT: pshufb %xmm2, %xmm4
28730 ; SSE41-NEXT: psrlw $4, %xmm0
28731 ; SSE41-NEXT: pand %xmm1, %xmm0
28732 ; SSE41-NEXT: pshufb %xmm0, %xmm3
28733 ; SSE41-NEXT: paddb %xmm4, %xmm3
28734 ; SSE41-NEXT: pxor %xmm0, %xmm0
28735 ; SSE41-NEXT: psadbw %xmm3, %xmm0
28736 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
28737 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
28740 ; AVX1-LABEL: ugt_40_v2i64:
28742 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28743 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
28744 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28745 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
28746 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
28747 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
28748 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
28749 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
28750 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
28751 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28752 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
28755 ; AVX2-LABEL: ugt_40_v2i64:
28757 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28758 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
28759 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28760 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
28761 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
28762 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
28763 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
28764 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
28765 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
28766 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28767 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
28770 ; AVX512VPOPCNTDQ-LABEL: ugt_40_v2i64:
28771 ; AVX512VPOPCNTDQ: # %bb.0:
28772 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
28773 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
28774 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
28775 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
28776 ; AVX512VPOPCNTDQ-NEXT: retq
28778 ; AVX512VPOPCNTDQVL-LABEL: ugt_40_v2i64:
28779 ; AVX512VPOPCNTDQVL: # %bb.0:
28780 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
28781 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [40,40]
28782 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
28783 ; AVX512VPOPCNTDQVL-NEXT: retq
28785 ; BITALG_NOVLX-LABEL: ugt_40_v2i64:
28786 ; BITALG_NOVLX: # %bb.0:
28787 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
28788 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
28789 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
28790 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28791 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
28792 ; BITALG_NOVLX-NEXT: vzeroupper
28793 ; BITALG_NOVLX-NEXT: retq
28795 ; BITALG-LABEL: ugt_40_v2i64:
28797 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
28798 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
28799 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28800 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [40,40]
28801 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
28802 ; BITALG-NEXT: retq
28803 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
28804 %3 = icmp ugt <2 x i64> %2, <i64 40, i64 40>
28805 %4 = sext <2 x i1> %3 to <2 x i64>
28809 define <2 x i64> @ult_41_v2i64(<2 x i64> %0) {
28810 ; SSE2-LABEL: ult_41_v2i64:
28812 ; SSE2-NEXT: movdqa %xmm0, %xmm1
28813 ; SSE2-NEXT: psrlw $1, %xmm1
28814 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28815 ; SSE2-NEXT: psubb %xmm1, %xmm0
28816 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28817 ; SSE2-NEXT: movdqa %xmm0, %xmm2
28818 ; SSE2-NEXT: pand %xmm1, %xmm2
28819 ; SSE2-NEXT: psrlw $2, %xmm0
28820 ; SSE2-NEXT: pand %xmm1, %xmm0
28821 ; SSE2-NEXT: paddb %xmm2, %xmm0
28822 ; SSE2-NEXT: movdqa %xmm0, %xmm1
28823 ; SSE2-NEXT: psrlw $4, %xmm1
28824 ; SSE2-NEXT: paddb %xmm0, %xmm1
28825 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28826 ; SSE2-NEXT: pxor %xmm0, %xmm0
28827 ; SSE2-NEXT: psadbw %xmm1, %xmm0
28828 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
28829 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [41,41,41,41]
28830 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
28833 ; SSE3-LABEL: ult_41_v2i64:
28835 ; SSE3-NEXT: movdqa %xmm0, %xmm1
28836 ; SSE3-NEXT: psrlw $1, %xmm1
28837 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28838 ; SSE3-NEXT: psubb %xmm1, %xmm0
28839 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28840 ; SSE3-NEXT: movdqa %xmm0, %xmm2
28841 ; SSE3-NEXT: pand %xmm1, %xmm2
28842 ; SSE3-NEXT: psrlw $2, %xmm0
28843 ; SSE3-NEXT: pand %xmm1, %xmm0
28844 ; SSE3-NEXT: paddb %xmm2, %xmm0
28845 ; SSE3-NEXT: movdqa %xmm0, %xmm1
28846 ; SSE3-NEXT: psrlw $4, %xmm1
28847 ; SSE3-NEXT: paddb %xmm0, %xmm1
28848 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28849 ; SSE3-NEXT: pxor %xmm0, %xmm0
28850 ; SSE3-NEXT: psadbw %xmm1, %xmm0
28851 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
28852 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [41,41,41,41]
28853 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
28856 ; SSSE3-LABEL: ult_41_v2i64:
28858 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28859 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
28860 ; SSSE3-NEXT: pand %xmm1, %xmm2
28861 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28862 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
28863 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
28864 ; SSSE3-NEXT: psrlw $4, %xmm0
28865 ; SSSE3-NEXT: pand %xmm1, %xmm0
28866 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
28867 ; SSSE3-NEXT: paddb %xmm4, %xmm3
28868 ; SSSE3-NEXT: pxor %xmm0, %xmm0
28869 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
28870 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
28871 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [41,41,41,41]
28872 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
28875 ; SSE41-LABEL: ult_41_v2i64:
28877 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28878 ; SSE41-NEXT: movdqa %xmm0, %xmm2
28879 ; SSE41-NEXT: pand %xmm1, %xmm2
28880 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28881 ; SSE41-NEXT: movdqa %xmm3, %xmm4
28882 ; SSE41-NEXT: pshufb %xmm2, %xmm4
28883 ; SSE41-NEXT: psrlw $4, %xmm0
28884 ; SSE41-NEXT: pand %xmm1, %xmm0
28885 ; SSE41-NEXT: pshufb %xmm0, %xmm3
28886 ; SSE41-NEXT: paddb %xmm4, %xmm3
28887 ; SSE41-NEXT: pxor %xmm0, %xmm0
28888 ; SSE41-NEXT: psadbw %xmm3, %xmm0
28889 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
28890 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [41,41,41,41]
28891 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
28894 ; AVX1-LABEL: ult_41_v2i64:
28896 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28897 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
28898 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28899 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
28900 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
28901 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
28902 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
28903 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
28904 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
28905 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28906 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [41,41]
28907 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28910 ; AVX2-LABEL: ult_41_v2i64:
28912 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
28913 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
28914 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
28915 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
28916 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
28917 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
28918 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
28919 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
28920 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
28921 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28922 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [41,41]
28923 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28926 ; AVX512VPOPCNTDQ-LABEL: ult_41_v2i64:
28927 ; AVX512VPOPCNTDQ: # %bb.0:
28928 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
28929 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
28930 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [41,41]
28931 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28932 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
28933 ; AVX512VPOPCNTDQ-NEXT: retq
28935 ; AVX512VPOPCNTDQVL-LABEL: ult_41_v2i64:
28936 ; AVX512VPOPCNTDQVL: # %bb.0:
28937 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
28938 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [41,41]
28939 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28940 ; AVX512VPOPCNTDQVL-NEXT: retq
28942 ; BITALG_NOVLX-LABEL: ult_41_v2i64:
28943 ; BITALG_NOVLX: # %bb.0:
28944 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
28945 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
28946 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
28947 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28948 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [41,41]
28949 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28950 ; BITALG_NOVLX-NEXT: vzeroupper
28951 ; BITALG_NOVLX-NEXT: retq
28953 ; BITALG-LABEL: ult_41_v2i64:
28955 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
28956 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
28957 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
28958 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [41,41]
28959 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
28960 ; BITALG-NEXT: retq
28961 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
28962 %3 = icmp ult <2 x i64> %2, <i64 41, i64 41>
28963 %4 = sext <2 x i1> %3 to <2 x i64>
28967 define <2 x i64> @ugt_41_v2i64(<2 x i64> %0) {
28968 ; SSE2-LABEL: ugt_41_v2i64:
28970 ; SSE2-NEXT: movdqa %xmm0, %xmm1
28971 ; SSE2-NEXT: psrlw $1, %xmm1
28972 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28973 ; SSE2-NEXT: psubb %xmm1, %xmm0
28974 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28975 ; SSE2-NEXT: movdqa %xmm0, %xmm2
28976 ; SSE2-NEXT: pand %xmm1, %xmm2
28977 ; SSE2-NEXT: psrlw $2, %xmm0
28978 ; SSE2-NEXT: pand %xmm1, %xmm0
28979 ; SSE2-NEXT: paddb %xmm2, %xmm0
28980 ; SSE2-NEXT: movdqa %xmm0, %xmm1
28981 ; SSE2-NEXT: psrlw $4, %xmm1
28982 ; SSE2-NEXT: paddb %xmm0, %xmm1
28983 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28984 ; SSE2-NEXT: pxor %xmm0, %xmm0
28985 ; SSE2-NEXT: psadbw %xmm1, %xmm0
28986 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
28987 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
28990 ; SSE3-LABEL: ugt_41_v2i64:
28992 ; SSE3-NEXT: movdqa %xmm0, %xmm1
28993 ; SSE3-NEXT: psrlw $1, %xmm1
28994 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
28995 ; SSE3-NEXT: psubb %xmm1, %xmm0
28996 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
28997 ; SSE3-NEXT: movdqa %xmm0, %xmm2
28998 ; SSE3-NEXT: pand %xmm1, %xmm2
28999 ; SSE3-NEXT: psrlw $2, %xmm0
29000 ; SSE3-NEXT: pand %xmm1, %xmm0
29001 ; SSE3-NEXT: paddb %xmm2, %xmm0
29002 ; SSE3-NEXT: movdqa %xmm0, %xmm1
29003 ; SSE3-NEXT: psrlw $4, %xmm1
29004 ; SSE3-NEXT: paddb %xmm0, %xmm1
29005 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29006 ; SSE3-NEXT: pxor %xmm0, %xmm0
29007 ; SSE3-NEXT: psadbw %xmm1, %xmm0
29008 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
29009 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
29012 ; SSSE3-LABEL: ugt_41_v2i64:
29014 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29015 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
29016 ; SSSE3-NEXT: pand %xmm1, %xmm2
29017 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29018 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
29019 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
29020 ; SSSE3-NEXT: psrlw $4, %xmm0
29021 ; SSSE3-NEXT: pand %xmm1, %xmm0
29022 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
29023 ; SSSE3-NEXT: paddb %xmm4, %xmm3
29024 ; SSSE3-NEXT: pxor %xmm0, %xmm0
29025 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
29026 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
29027 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
29030 ; SSE41-LABEL: ugt_41_v2i64:
29032 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29033 ; SSE41-NEXT: movdqa %xmm0, %xmm2
29034 ; SSE41-NEXT: pand %xmm1, %xmm2
29035 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29036 ; SSE41-NEXT: movdqa %xmm3, %xmm4
29037 ; SSE41-NEXT: pshufb %xmm2, %xmm4
29038 ; SSE41-NEXT: psrlw $4, %xmm0
29039 ; SSE41-NEXT: pand %xmm1, %xmm0
29040 ; SSE41-NEXT: pshufb %xmm0, %xmm3
29041 ; SSE41-NEXT: paddb %xmm4, %xmm3
29042 ; SSE41-NEXT: pxor %xmm0, %xmm0
29043 ; SSE41-NEXT: psadbw %xmm3, %xmm0
29044 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
29045 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
29048 ; AVX1-LABEL: ugt_41_v2i64:
29050 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29051 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
29052 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29053 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
29054 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
29055 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
29056 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
29057 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
29058 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
29059 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29060 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
29063 ; AVX2-LABEL: ugt_41_v2i64:
29065 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29066 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
29067 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29068 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
29069 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
29070 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
29071 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
29072 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
29073 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
29074 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29075 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
29078 ; AVX512VPOPCNTDQ-LABEL: ugt_41_v2i64:
29079 ; AVX512VPOPCNTDQ: # %bb.0:
29080 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
29081 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
29082 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
29083 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
29084 ; AVX512VPOPCNTDQ-NEXT: retq
29086 ; AVX512VPOPCNTDQVL-LABEL: ugt_41_v2i64:
29087 ; AVX512VPOPCNTDQVL: # %bb.0:
29088 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
29089 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [41,41]
29090 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
29091 ; AVX512VPOPCNTDQVL-NEXT: retq
29093 ; BITALG_NOVLX-LABEL: ugt_41_v2i64:
29094 ; BITALG_NOVLX: # %bb.0:
29095 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
29096 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
29097 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
29098 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29099 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
29100 ; BITALG_NOVLX-NEXT: vzeroupper
29101 ; BITALG_NOVLX-NEXT: retq
29103 ; BITALG-LABEL: ugt_41_v2i64:
29105 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
29106 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
29107 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29108 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [41,41]
29109 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
29110 ; BITALG-NEXT: retq
29111 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
29112 %3 = icmp ugt <2 x i64> %2, <i64 41, i64 41>
29113 %4 = sext <2 x i1> %3 to <2 x i64>
29117 define <2 x i64> @ult_42_v2i64(<2 x i64> %0) {
29118 ; SSE2-LABEL: ult_42_v2i64:
29120 ; SSE2-NEXT: movdqa %xmm0, %xmm1
29121 ; SSE2-NEXT: psrlw $1, %xmm1
29122 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29123 ; SSE2-NEXT: psubb %xmm1, %xmm0
29124 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29125 ; SSE2-NEXT: movdqa %xmm0, %xmm2
29126 ; SSE2-NEXT: pand %xmm1, %xmm2
29127 ; SSE2-NEXT: psrlw $2, %xmm0
29128 ; SSE2-NEXT: pand %xmm1, %xmm0
29129 ; SSE2-NEXT: paddb %xmm2, %xmm0
29130 ; SSE2-NEXT: movdqa %xmm0, %xmm1
29131 ; SSE2-NEXT: psrlw $4, %xmm1
29132 ; SSE2-NEXT: paddb %xmm0, %xmm1
29133 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29134 ; SSE2-NEXT: pxor %xmm0, %xmm0
29135 ; SSE2-NEXT: psadbw %xmm1, %xmm0
29136 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
29137 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [42,42,42,42]
29138 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
29141 ; SSE3-LABEL: ult_42_v2i64:
29143 ; SSE3-NEXT: movdqa %xmm0, %xmm1
29144 ; SSE3-NEXT: psrlw $1, %xmm1
29145 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29146 ; SSE3-NEXT: psubb %xmm1, %xmm0
29147 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29148 ; SSE3-NEXT: movdqa %xmm0, %xmm2
29149 ; SSE3-NEXT: pand %xmm1, %xmm2
29150 ; SSE3-NEXT: psrlw $2, %xmm0
29151 ; SSE3-NEXT: pand %xmm1, %xmm0
29152 ; SSE3-NEXT: paddb %xmm2, %xmm0
29153 ; SSE3-NEXT: movdqa %xmm0, %xmm1
29154 ; SSE3-NEXT: psrlw $4, %xmm1
29155 ; SSE3-NEXT: paddb %xmm0, %xmm1
29156 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29157 ; SSE3-NEXT: pxor %xmm0, %xmm0
29158 ; SSE3-NEXT: psadbw %xmm1, %xmm0
29159 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
29160 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [42,42,42,42]
29161 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
29164 ; SSSE3-LABEL: ult_42_v2i64:
29166 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29167 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
29168 ; SSSE3-NEXT: pand %xmm1, %xmm2
29169 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29170 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
29171 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
29172 ; SSSE3-NEXT: psrlw $4, %xmm0
29173 ; SSSE3-NEXT: pand %xmm1, %xmm0
29174 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
29175 ; SSSE3-NEXT: paddb %xmm4, %xmm3
29176 ; SSSE3-NEXT: pxor %xmm0, %xmm0
29177 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
29178 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
29179 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [42,42,42,42]
29180 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
29183 ; SSE41-LABEL: ult_42_v2i64:
29185 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29186 ; SSE41-NEXT: movdqa %xmm0, %xmm2
29187 ; SSE41-NEXT: pand %xmm1, %xmm2
29188 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29189 ; SSE41-NEXT: movdqa %xmm3, %xmm4
29190 ; SSE41-NEXT: pshufb %xmm2, %xmm4
29191 ; SSE41-NEXT: psrlw $4, %xmm0
29192 ; SSE41-NEXT: pand %xmm1, %xmm0
29193 ; SSE41-NEXT: pshufb %xmm0, %xmm3
29194 ; SSE41-NEXT: paddb %xmm4, %xmm3
29195 ; SSE41-NEXT: pxor %xmm0, %xmm0
29196 ; SSE41-NEXT: psadbw %xmm3, %xmm0
29197 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
29198 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [42,42,42,42]
29199 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
29202 ; AVX1-LABEL: ult_42_v2i64:
29204 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29205 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
29206 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29207 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
29208 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
29209 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
29210 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
29211 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
29212 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
29213 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29214 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [42,42]
29215 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
29218 ; AVX2-LABEL: ult_42_v2i64:
29220 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29221 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
29222 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29223 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
29224 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
29225 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
29226 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
29227 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
29228 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
29229 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29230 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [42,42]
29231 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
29234 ; AVX512VPOPCNTDQ-LABEL: ult_42_v2i64:
29235 ; AVX512VPOPCNTDQ: # %bb.0:
29236 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
29237 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
29238 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [42,42]
29239 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
29240 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
29241 ; AVX512VPOPCNTDQ-NEXT: retq
29243 ; AVX512VPOPCNTDQVL-LABEL: ult_42_v2i64:
29244 ; AVX512VPOPCNTDQVL: # %bb.0:
29245 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
29246 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [42,42]
29247 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
29248 ; AVX512VPOPCNTDQVL-NEXT: retq
29250 ; BITALG_NOVLX-LABEL: ult_42_v2i64:
29251 ; BITALG_NOVLX: # %bb.0:
29252 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
29253 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
29254 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
29255 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29256 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [42,42]
29257 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
29258 ; BITALG_NOVLX-NEXT: vzeroupper
29259 ; BITALG_NOVLX-NEXT: retq
29261 ; BITALG-LABEL: ult_42_v2i64:
29263 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
29264 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
29265 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29266 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [42,42]
29267 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
29268 ; BITALG-NEXT: retq
29269 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
29270 %3 = icmp ult <2 x i64> %2, <i64 42, i64 42>
29271 %4 = sext <2 x i1> %3 to <2 x i64>
29275 define <2 x i64> @ugt_42_v2i64(<2 x i64> %0) {
29276 ; SSE2-LABEL: ugt_42_v2i64:
29278 ; SSE2-NEXT: movdqa %xmm0, %xmm1
29279 ; SSE2-NEXT: psrlw $1, %xmm1
29280 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29281 ; SSE2-NEXT: psubb %xmm1, %xmm0
29282 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29283 ; SSE2-NEXT: movdqa %xmm0, %xmm2
29284 ; SSE2-NEXT: pand %xmm1, %xmm2
29285 ; SSE2-NEXT: psrlw $2, %xmm0
29286 ; SSE2-NEXT: pand %xmm1, %xmm0
29287 ; SSE2-NEXT: paddb %xmm2, %xmm0
29288 ; SSE2-NEXT: movdqa %xmm0, %xmm1
29289 ; SSE2-NEXT: psrlw $4, %xmm1
29290 ; SSE2-NEXT: paddb %xmm0, %xmm1
29291 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29292 ; SSE2-NEXT: pxor %xmm0, %xmm0
29293 ; SSE2-NEXT: psadbw %xmm1, %xmm0
29294 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
29295 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
29298 ; SSE3-LABEL: ugt_42_v2i64:
29300 ; SSE3-NEXT: movdqa %xmm0, %xmm1
29301 ; SSE3-NEXT: psrlw $1, %xmm1
29302 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29303 ; SSE3-NEXT: psubb %xmm1, %xmm0
29304 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29305 ; SSE3-NEXT: movdqa %xmm0, %xmm2
29306 ; SSE3-NEXT: pand %xmm1, %xmm2
29307 ; SSE3-NEXT: psrlw $2, %xmm0
29308 ; SSE3-NEXT: pand %xmm1, %xmm0
29309 ; SSE3-NEXT: paddb %xmm2, %xmm0
29310 ; SSE3-NEXT: movdqa %xmm0, %xmm1
29311 ; SSE3-NEXT: psrlw $4, %xmm1
29312 ; SSE3-NEXT: paddb %xmm0, %xmm1
29313 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29314 ; SSE3-NEXT: pxor %xmm0, %xmm0
29315 ; SSE3-NEXT: psadbw %xmm1, %xmm0
29316 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
29317 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
29320 ; SSSE3-LABEL: ugt_42_v2i64:
29322 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29323 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
29324 ; SSSE3-NEXT: pand %xmm1, %xmm2
29325 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29326 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
29327 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
29328 ; SSSE3-NEXT: psrlw $4, %xmm0
29329 ; SSSE3-NEXT: pand %xmm1, %xmm0
29330 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
29331 ; SSSE3-NEXT: paddb %xmm4, %xmm3
29332 ; SSSE3-NEXT: pxor %xmm0, %xmm0
29333 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
29334 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
29335 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
29338 ; SSE41-LABEL: ugt_42_v2i64:
29340 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29341 ; SSE41-NEXT: movdqa %xmm0, %xmm2
29342 ; SSE41-NEXT: pand %xmm1, %xmm2
29343 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29344 ; SSE41-NEXT: movdqa %xmm3, %xmm4
29345 ; SSE41-NEXT: pshufb %xmm2, %xmm4
29346 ; SSE41-NEXT: psrlw $4, %xmm0
29347 ; SSE41-NEXT: pand %xmm1, %xmm0
29348 ; SSE41-NEXT: pshufb %xmm0, %xmm3
29349 ; SSE41-NEXT: paddb %xmm4, %xmm3
29350 ; SSE41-NEXT: pxor %xmm0, %xmm0
29351 ; SSE41-NEXT: psadbw %xmm3, %xmm0
29352 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
29353 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
29356 ; AVX1-LABEL: ugt_42_v2i64:
29358 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29359 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
29360 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29361 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
29362 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
29363 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
29364 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
29365 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
29366 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
29367 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29368 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
29371 ; AVX2-LABEL: ugt_42_v2i64:
29373 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29374 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
29375 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29376 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
29377 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
29378 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
29379 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
29380 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
29381 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
29382 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29383 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
29386 ; AVX512VPOPCNTDQ-LABEL: ugt_42_v2i64:
29387 ; AVX512VPOPCNTDQ: # %bb.0:
29388 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
29389 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
29390 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
29391 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
29392 ; AVX512VPOPCNTDQ-NEXT: retq
29394 ; AVX512VPOPCNTDQVL-LABEL: ugt_42_v2i64:
29395 ; AVX512VPOPCNTDQVL: # %bb.0:
29396 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
29397 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [42,42]
29398 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
29399 ; AVX512VPOPCNTDQVL-NEXT: retq
29401 ; BITALG_NOVLX-LABEL: ugt_42_v2i64:
29402 ; BITALG_NOVLX: # %bb.0:
29403 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
29404 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
29405 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
29406 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29407 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
29408 ; BITALG_NOVLX-NEXT: vzeroupper
29409 ; BITALG_NOVLX-NEXT: retq
29411 ; BITALG-LABEL: ugt_42_v2i64:
29413 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
29414 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
29415 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29416 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [42,42]
29417 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
29418 ; BITALG-NEXT: retq
29419 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
29420 %3 = icmp ugt <2 x i64> %2, <i64 42, i64 42>
29421 %4 = sext <2 x i1> %3 to <2 x i64>
29425 define <2 x i64> @ult_43_v2i64(<2 x i64> %0) {
29426 ; SSE2-LABEL: ult_43_v2i64:
29428 ; SSE2-NEXT: movdqa %xmm0, %xmm1
29429 ; SSE2-NEXT: psrlw $1, %xmm1
29430 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29431 ; SSE2-NEXT: psubb %xmm1, %xmm0
29432 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29433 ; SSE2-NEXT: movdqa %xmm0, %xmm2
29434 ; SSE2-NEXT: pand %xmm1, %xmm2
29435 ; SSE2-NEXT: psrlw $2, %xmm0
29436 ; SSE2-NEXT: pand %xmm1, %xmm0
29437 ; SSE2-NEXT: paddb %xmm2, %xmm0
29438 ; SSE2-NEXT: movdqa %xmm0, %xmm1
29439 ; SSE2-NEXT: psrlw $4, %xmm1
29440 ; SSE2-NEXT: paddb %xmm0, %xmm1
29441 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29442 ; SSE2-NEXT: pxor %xmm0, %xmm0
29443 ; SSE2-NEXT: psadbw %xmm1, %xmm0
29444 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
29445 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [43,43,43,43]
29446 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
29449 ; SSE3-LABEL: ult_43_v2i64:
29451 ; SSE3-NEXT: movdqa %xmm0, %xmm1
29452 ; SSE3-NEXT: psrlw $1, %xmm1
29453 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29454 ; SSE3-NEXT: psubb %xmm1, %xmm0
29455 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29456 ; SSE3-NEXT: movdqa %xmm0, %xmm2
29457 ; SSE3-NEXT: pand %xmm1, %xmm2
29458 ; SSE3-NEXT: psrlw $2, %xmm0
29459 ; SSE3-NEXT: pand %xmm1, %xmm0
29460 ; SSE3-NEXT: paddb %xmm2, %xmm0
29461 ; SSE3-NEXT: movdqa %xmm0, %xmm1
29462 ; SSE3-NEXT: psrlw $4, %xmm1
29463 ; SSE3-NEXT: paddb %xmm0, %xmm1
29464 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29465 ; SSE3-NEXT: pxor %xmm0, %xmm0
29466 ; SSE3-NEXT: psadbw %xmm1, %xmm0
29467 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
29468 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [43,43,43,43]
29469 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
29472 ; SSSE3-LABEL: ult_43_v2i64:
29474 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29475 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
29476 ; SSSE3-NEXT: pand %xmm1, %xmm2
29477 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29478 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
29479 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
29480 ; SSSE3-NEXT: psrlw $4, %xmm0
29481 ; SSSE3-NEXT: pand %xmm1, %xmm0
29482 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
29483 ; SSSE3-NEXT: paddb %xmm4, %xmm3
29484 ; SSSE3-NEXT: pxor %xmm0, %xmm0
29485 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
29486 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
29487 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [43,43,43,43]
29488 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
29491 ; SSE41-LABEL: ult_43_v2i64:
29493 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29494 ; SSE41-NEXT: movdqa %xmm0, %xmm2
29495 ; SSE41-NEXT: pand %xmm1, %xmm2
29496 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29497 ; SSE41-NEXT: movdqa %xmm3, %xmm4
29498 ; SSE41-NEXT: pshufb %xmm2, %xmm4
29499 ; SSE41-NEXT: psrlw $4, %xmm0
29500 ; SSE41-NEXT: pand %xmm1, %xmm0
29501 ; SSE41-NEXT: pshufb %xmm0, %xmm3
29502 ; SSE41-NEXT: paddb %xmm4, %xmm3
29503 ; SSE41-NEXT: pxor %xmm0, %xmm0
29504 ; SSE41-NEXT: psadbw %xmm3, %xmm0
29505 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
29506 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [43,43,43,43]
29507 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
29510 ; AVX1-LABEL: ult_43_v2i64:
29512 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29513 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
29514 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29515 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
29516 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
29517 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
29518 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
29519 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
29520 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
29521 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29522 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [43,43]
29523 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
29526 ; AVX2-LABEL: ult_43_v2i64:
29528 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29529 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
29530 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29531 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
29532 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
29533 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
29534 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
29535 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
29536 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
29537 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29538 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [43,43]
29539 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
29542 ; AVX512VPOPCNTDQ-LABEL: ult_43_v2i64:
29543 ; AVX512VPOPCNTDQ: # %bb.0:
29544 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
29545 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
29546 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [43,43]
29547 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
29548 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
29549 ; AVX512VPOPCNTDQ-NEXT: retq
29551 ; AVX512VPOPCNTDQVL-LABEL: ult_43_v2i64:
29552 ; AVX512VPOPCNTDQVL: # %bb.0:
29553 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
29554 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [43,43]
29555 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
29556 ; AVX512VPOPCNTDQVL-NEXT: retq
29558 ; BITALG_NOVLX-LABEL: ult_43_v2i64:
29559 ; BITALG_NOVLX: # %bb.0:
29560 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
29561 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
29562 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
29563 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29564 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [43,43]
29565 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
29566 ; BITALG_NOVLX-NEXT: vzeroupper
29567 ; BITALG_NOVLX-NEXT: retq
29569 ; BITALG-LABEL: ult_43_v2i64:
29571 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
29572 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
29573 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29574 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [43,43]
29575 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
29576 ; BITALG-NEXT: retq
29577 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
29578 %3 = icmp ult <2 x i64> %2, <i64 43, i64 43>
29579 %4 = sext <2 x i1> %3 to <2 x i64>
29583 define <2 x i64> @ugt_43_v2i64(<2 x i64> %0) {
29584 ; SSE2-LABEL: ugt_43_v2i64:
29586 ; SSE2-NEXT: movdqa %xmm0, %xmm1
29587 ; SSE2-NEXT: psrlw $1, %xmm1
29588 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29589 ; SSE2-NEXT: psubb %xmm1, %xmm0
29590 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29591 ; SSE2-NEXT: movdqa %xmm0, %xmm2
29592 ; SSE2-NEXT: pand %xmm1, %xmm2
29593 ; SSE2-NEXT: psrlw $2, %xmm0
29594 ; SSE2-NEXT: pand %xmm1, %xmm0
29595 ; SSE2-NEXT: paddb %xmm2, %xmm0
29596 ; SSE2-NEXT: movdqa %xmm0, %xmm1
29597 ; SSE2-NEXT: psrlw $4, %xmm1
29598 ; SSE2-NEXT: paddb %xmm0, %xmm1
29599 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29600 ; SSE2-NEXT: pxor %xmm0, %xmm0
29601 ; SSE2-NEXT: psadbw %xmm1, %xmm0
29602 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
29603 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
29606 ; SSE3-LABEL: ugt_43_v2i64:
29608 ; SSE3-NEXT: movdqa %xmm0, %xmm1
29609 ; SSE3-NEXT: psrlw $1, %xmm1
29610 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29611 ; SSE3-NEXT: psubb %xmm1, %xmm0
29612 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29613 ; SSE3-NEXT: movdqa %xmm0, %xmm2
29614 ; SSE3-NEXT: pand %xmm1, %xmm2
29615 ; SSE3-NEXT: psrlw $2, %xmm0
29616 ; SSE3-NEXT: pand %xmm1, %xmm0
29617 ; SSE3-NEXT: paddb %xmm2, %xmm0
29618 ; SSE3-NEXT: movdqa %xmm0, %xmm1
29619 ; SSE3-NEXT: psrlw $4, %xmm1
29620 ; SSE3-NEXT: paddb %xmm0, %xmm1
29621 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29622 ; SSE3-NEXT: pxor %xmm0, %xmm0
29623 ; SSE3-NEXT: psadbw %xmm1, %xmm0
29624 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
29625 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
29628 ; SSSE3-LABEL: ugt_43_v2i64:
29630 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29631 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
29632 ; SSSE3-NEXT: pand %xmm1, %xmm2
29633 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29634 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
29635 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
29636 ; SSSE3-NEXT: psrlw $4, %xmm0
29637 ; SSSE3-NEXT: pand %xmm1, %xmm0
29638 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
29639 ; SSSE3-NEXT: paddb %xmm4, %xmm3
29640 ; SSSE3-NEXT: pxor %xmm0, %xmm0
29641 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
29642 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
29643 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
29646 ; SSE41-LABEL: ugt_43_v2i64:
29648 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29649 ; SSE41-NEXT: movdqa %xmm0, %xmm2
29650 ; SSE41-NEXT: pand %xmm1, %xmm2
29651 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29652 ; SSE41-NEXT: movdqa %xmm3, %xmm4
29653 ; SSE41-NEXT: pshufb %xmm2, %xmm4
29654 ; SSE41-NEXT: psrlw $4, %xmm0
29655 ; SSE41-NEXT: pand %xmm1, %xmm0
29656 ; SSE41-NEXT: pshufb %xmm0, %xmm3
29657 ; SSE41-NEXT: paddb %xmm4, %xmm3
29658 ; SSE41-NEXT: pxor %xmm0, %xmm0
29659 ; SSE41-NEXT: psadbw %xmm3, %xmm0
29660 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
29661 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
29664 ; AVX1-LABEL: ugt_43_v2i64:
29666 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29667 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
29668 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29669 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
29670 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
29671 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
29672 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
29673 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
29674 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
29675 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29676 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
29679 ; AVX2-LABEL: ugt_43_v2i64:
29681 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29682 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
29683 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29684 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
29685 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
29686 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
29687 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
29688 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
29689 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
29690 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29691 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
29694 ; AVX512VPOPCNTDQ-LABEL: ugt_43_v2i64:
29695 ; AVX512VPOPCNTDQ: # %bb.0:
29696 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
29697 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
29698 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
29699 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
29700 ; AVX512VPOPCNTDQ-NEXT: retq
29702 ; AVX512VPOPCNTDQVL-LABEL: ugt_43_v2i64:
29703 ; AVX512VPOPCNTDQVL: # %bb.0:
29704 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
29705 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [43,43]
29706 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
29707 ; AVX512VPOPCNTDQVL-NEXT: retq
29709 ; BITALG_NOVLX-LABEL: ugt_43_v2i64:
29710 ; BITALG_NOVLX: # %bb.0:
29711 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
29712 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
29713 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
29714 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29715 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
29716 ; BITALG_NOVLX-NEXT: vzeroupper
29717 ; BITALG_NOVLX-NEXT: retq
29719 ; BITALG-LABEL: ugt_43_v2i64:
29721 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
29722 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
29723 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29724 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [43,43]
29725 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
29726 ; BITALG-NEXT: retq
29727 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
29728 %3 = icmp ugt <2 x i64> %2, <i64 43, i64 43>
29729 %4 = sext <2 x i1> %3 to <2 x i64>
29733 define <2 x i64> @ult_44_v2i64(<2 x i64> %0) {
29734 ; SSE2-LABEL: ult_44_v2i64:
29736 ; SSE2-NEXT: movdqa %xmm0, %xmm1
29737 ; SSE2-NEXT: psrlw $1, %xmm1
29738 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29739 ; SSE2-NEXT: psubb %xmm1, %xmm0
29740 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29741 ; SSE2-NEXT: movdqa %xmm0, %xmm2
29742 ; SSE2-NEXT: pand %xmm1, %xmm2
29743 ; SSE2-NEXT: psrlw $2, %xmm0
29744 ; SSE2-NEXT: pand %xmm1, %xmm0
29745 ; SSE2-NEXT: paddb %xmm2, %xmm0
29746 ; SSE2-NEXT: movdqa %xmm0, %xmm1
29747 ; SSE2-NEXT: psrlw $4, %xmm1
29748 ; SSE2-NEXT: paddb %xmm0, %xmm1
29749 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29750 ; SSE2-NEXT: pxor %xmm0, %xmm0
29751 ; SSE2-NEXT: psadbw %xmm1, %xmm0
29752 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
29753 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [44,44,44,44]
29754 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
29757 ; SSE3-LABEL: ult_44_v2i64:
29759 ; SSE3-NEXT: movdqa %xmm0, %xmm1
29760 ; SSE3-NEXT: psrlw $1, %xmm1
29761 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29762 ; SSE3-NEXT: psubb %xmm1, %xmm0
29763 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29764 ; SSE3-NEXT: movdqa %xmm0, %xmm2
29765 ; SSE3-NEXT: pand %xmm1, %xmm2
29766 ; SSE3-NEXT: psrlw $2, %xmm0
29767 ; SSE3-NEXT: pand %xmm1, %xmm0
29768 ; SSE3-NEXT: paddb %xmm2, %xmm0
29769 ; SSE3-NEXT: movdqa %xmm0, %xmm1
29770 ; SSE3-NEXT: psrlw $4, %xmm1
29771 ; SSE3-NEXT: paddb %xmm0, %xmm1
29772 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29773 ; SSE3-NEXT: pxor %xmm0, %xmm0
29774 ; SSE3-NEXT: psadbw %xmm1, %xmm0
29775 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
29776 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [44,44,44,44]
29777 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
29780 ; SSSE3-LABEL: ult_44_v2i64:
29782 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29783 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
29784 ; SSSE3-NEXT: pand %xmm1, %xmm2
29785 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29786 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
29787 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
29788 ; SSSE3-NEXT: psrlw $4, %xmm0
29789 ; SSSE3-NEXT: pand %xmm1, %xmm0
29790 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
29791 ; SSSE3-NEXT: paddb %xmm4, %xmm3
29792 ; SSSE3-NEXT: pxor %xmm0, %xmm0
29793 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
29794 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
29795 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [44,44,44,44]
29796 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
29799 ; SSE41-LABEL: ult_44_v2i64:
29801 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29802 ; SSE41-NEXT: movdqa %xmm0, %xmm2
29803 ; SSE41-NEXT: pand %xmm1, %xmm2
29804 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29805 ; SSE41-NEXT: movdqa %xmm3, %xmm4
29806 ; SSE41-NEXT: pshufb %xmm2, %xmm4
29807 ; SSE41-NEXT: psrlw $4, %xmm0
29808 ; SSE41-NEXT: pand %xmm1, %xmm0
29809 ; SSE41-NEXT: pshufb %xmm0, %xmm3
29810 ; SSE41-NEXT: paddb %xmm4, %xmm3
29811 ; SSE41-NEXT: pxor %xmm0, %xmm0
29812 ; SSE41-NEXT: psadbw %xmm3, %xmm0
29813 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
29814 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [44,44,44,44]
29815 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
29818 ; AVX1-LABEL: ult_44_v2i64:
29820 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29821 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
29822 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29823 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
29824 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
29825 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
29826 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
29827 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
29828 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
29829 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29830 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [44,44]
29831 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
29834 ; AVX2-LABEL: ult_44_v2i64:
29836 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29837 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
29838 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29839 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
29840 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
29841 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
29842 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
29843 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
29844 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
29845 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29846 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [44,44]
29847 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
29850 ; AVX512VPOPCNTDQ-LABEL: ult_44_v2i64:
29851 ; AVX512VPOPCNTDQ: # %bb.0:
29852 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
29853 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
29854 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [44,44]
29855 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
29856 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
29857 ; AVX512VPOPCNTDQ-NEXT: retq
29859 ; AVX512VPOPCNTDQVL-LABEL: ult_44_v2i64:
29860 ; AVX512VPOPCNTDQVL: # %bb.0:
29861 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
29862 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [44,44]
29863 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
29864 ; AVX512VPOPCNTDQVL-NEXT: retq
29866 ; BITALG_NOVLX-LABEL: ult_44_v2i64:
29867 ; BITALG_NOVLX: # %bb.0:
29868 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
29869 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
29870 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
29871 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29872 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [44,44]
29873 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
29874 ; BITALG_NOVLX-NEXT: vzeroupper
29875 ; BITALG_NOVLX-NEXT: retq
29877 ; BITALG-LABEL: ult_44_v2i64:
29879 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
29880 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
29881 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29882 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [44,44]
29883 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
29884 ; BITALG-NEXT: retq
29885 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
29886 %3 = icmp ult <2 x i64> %2, <i64 44, i64 44>
29887 %4 = sext <2 x i1> %3 to <2 x i64>
29891 define <2 x i64> @ugt_44_v2i64(<2 x i64> %0) {
29892 ; SSE2-LABEL: ugt_44_v2i64:
29894 ; SSE2-NEXT: movdqa %xmm0, %xmm1
29895 ; SSE2-NEXT: psrlw $1, %xmm1
29896 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29897 ; SSE2-NEXT: psubb %xmm1, %xmm0
29898 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29899 ; SSE2-NEXT: movdqa %xmm0, %xmm2
29900 ; SSE2-NEXT: pand %xmm1, %xmm2
29901 ; SSE2-NEXT: psrlw $2, %xmm0
29902 ; SSE2-NEXT: pand %xmm1, %xmm0
29903 ; SSE2-NEXT: paddb %xmm2, %xmm0
29904 ; SSE2-NEXT: movdqa %xmm0, %xmm1
29905 ; SSE2-NEXT: psrlw $4, %xmm1
29906 ; SSE2-NEXT: paddb %xmm0, %xmm1
29907 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29908 ; SSE2-NEXT: pxor %xmm0, %xmm0
29909 ; SSE2-NEXT: psadbw %xmm1, %xmm0
29910 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
29911 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
29914 ; SSE3-LABEL: ugt_44_v2i64:
29916 ; SSE3-NEXT: movdqa %xmm0, %xmm1
29917 ; SSE3-NEXT: psrlw $1, %xmm1
29918 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29919 ; SSE3-NEXT: psubb %xmm1, %xmm0
29920 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
29921 ; SSE3-NEXT: movdqa %xmm0, %xmm2
29922 ; SSE3-NEXT: pand %xmm1, %xmm2
29923 ; SSE3-NEXT: psrlw $2, %xmm0
29924 ; SSE3-NEXT: pand %xmm1, %xmm0
29925 ; SSE3-NEXT: paddb %xmm2, %xmm0
29926 ; SSE3-NEXT: movdqa %xmm0, %xmm1
29927 ; SSE3-NEXT: psrlw $4, %xmm1
29928 ; SSE3-NEXT: paddb %xmm0, %xmm1
29929 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
29930 ; SSE3-NEXT: pxor %xmm0, %xmm0
29931 ; SSE3-NEXT: psadbw %xmm1, %xmm0
29932 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
29933 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
29936 ; SSSE3-LABEL: ugt_44_v2i64:
29938 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29939 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
29940 ; SSSE3-NEXT: pand %xmm1, %xmm2
29941 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29942 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
29943 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
29944 ; SSSE3-NEXT: psrlw $4, %xmm0
29945 ; SSSE3-NEXT: pand %xmm1, %xmm0
29946 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
29947 ; SSSE3-NEXT: paddb %xmm4, %xmm3
29948 ; SSSE3-NEXT: pxor %xmm0, %xmm0
29949 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
29950 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
29951 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
29954 ; SSE41-LABEL: ugt_44_v2i64:
29956 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29957 ; SSE41-NEXT: movdqa %xmm0, %xmm2
29958 ; SSE41-NEXT: pand %xmm1, %xmm2
29959 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29960 ; SSE41-NEXT: movdqa %xmm3, %xmm4
29961 ; SSE41-NEXT: pshufb %xmm2, %xmm4
29962 ; SSE41-NEXT: psrlw $4, %xmm0
29963 ; SSE41-NEXT: pand %xmm1, %xmm0
29964 ; SSE41-NEXT: pshufb %xmm0, %xmm3
29965 ; SSE41-NEXT: paddb %xmm4, %xmm3
29966 ; SSE41-NEXT: pxor %xmm0, %xmm0
29967 ; SSE41-NEXT: psadbw %xmm3, %xmm0
29968 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
29969 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
29972 ; AVX1-LABEL: ugt_44_v2i64:
29974 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29975 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
29976 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29977 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
29978 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
29979 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
29980 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
29981 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
29982 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
29983 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29984 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
29987 ; AVX2-LABEL: ugt_44_v2i64:
29989 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
29990 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
29991 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
29992 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
29993 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
29994 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
29995 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
29996 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
29997 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
29998 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
29999 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
30002 ; AVX512VPOPCNTDQ-LABEL: ugt_44_v2i64:
30003 ; AVX512VPOPCNTDQ: # %bb.0:
30004 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
30005 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
30006 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
30007 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
30008 ; AVX512VPOPCNTDQ-NEXT: retq
30010 ; AVX512VPOPCNTDQVL-LABEL: ugt_44_v2i64:
30011 ; AVX512VPOPCNTDQVL: # %bb.0:
30012 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
30013 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [44,44]
30014 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
30015 ; AVX512VPOPCNTDQVL-NEXT: retq
30017 ; BITALG_NOVLX-LABEL: ugt_44_v2i64:
30018 ; BITALG_NOVLX: # %bb.0:
30019 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
30020 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
30021 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
30022 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30023 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
30024 ; BITALG_NOVLX-NEXT: vzeroupper
30025 ; BITALG_NOVLX-NEXT: retq
30027 ; BITALG-LABEL: ugt_44_v2i64:
30029 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
30030 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
30031 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30032 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [44,44]
30033 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
30034 ; BITALG-NEXT: retq
30035 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
30036 %3 = icmp ugt <2 x i64> %2, <i64 44, i64 44>
30037 %4 = sext <2 x i1> %3 to <2 x i64>
30041 define <2 x i64> @ult_45_v2i64(<2 x i64> %0) {
30042 ; SSE2-LABEL: ult_45_v2i64:
30044 ; SSE2-NEXT: movdqa %xmm0, %xmm1
30045 ; SSE2-NEXT: psrlw $1, %xmm1
30046 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30047 ; SSE2-NEXT: psubb %xmm1, %xmm0
30048 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30049 ; SSE2-NEXT: movdqa %xmm0, %xmm2
30050 ; SSE2-NEXT: pand %xmm1, %xmm2
30051 ; SSE2-NEXT: psrlw $2, %xmm0
30052 ; SSE2-NEXT: pand %xmm1, %xmm0
30053 ; SSE2-NEXT: paddb %xmm2, %xmm0
30054 ; SSE2-NEXT: movdqa %xmm0, %xmm1
30055 ; SSE2-NEXT: psrlw $4, %xmm1
30056 ; SSE2-NEXT: paddb %xmm0, %xmm1
30057 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30058 ; SSE2-NEXT: pxor %xmm0, %xmm0
30059 ; SSE2-NEXT: psadbw %xmm1, %xmm0
30060 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
30061 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [45,45,45,45]
30062 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
30065 ; SSE3-LABEL: ult_45_v2i64:
30067 ; SSE3-NEXT: movdqa %xmm0, %xmm1
30068 ; SSE3-NEXT: psrlw $1, %xmm1
30069 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30070 ; SSE3-NEXT: psubb %xmm1, %xmm0
30071 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30072 ; SSE3-NEXT: movdqa %xmm0, %xmm2
30073 ; SSE3-NEXT: pand %xmm1, %xmm2
30074 ; SSE3-NEXT: psrlw $2, %xmm0
30075 ; SSE3-NEXT: pand %xmm1, %xmm0
30076 ; SSE3-NEXT: paddb %xmm2, %xmm0
30077 ; SSE3-NEXT: movdqa %xmm0, %xmm1
30078 ; SSE3-NEXT: psrlw $4, %xmm1
30079 ; SSE3-NEXT: paddb %xmm0, %xmm1
30080 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30081 ; SSE3-NEXT: pxor %xmm0, %xmm0
30082 ; SSE3-NEXT: psadbw %xmm1, %xmm0
30083 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
30084 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [45,45,45,45]
30085 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
30088 ; SSSE3-LABEL: ult_45_v2i64:
30090 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30091 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
30092 ; SSSE3-NEXT: pand %xmm1, %xmm2
30093 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30094 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
30095 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
30096 ; SSSE3-NEXT: psrlw $4, %xmm0
30097 ; SSSE3-NEXT: pand %xmm1, %xmm0
30098 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
30099 ; SSSE3-NEXT: paddb %xmm4, %xmm3
30100 ; SSSE3-NEXT: pxor %xmm0, %xmm0
30101 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
30102 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
30103 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [45,45,45,45]
30104 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
30107 ; SSE41-LABEL: ult_45_v2i64:
30109 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30110 ; SSE41-NEXT: movdqa %xmm0, %xmm2
30111 ; SSE41-NEXT: pand %xmm1, %xmm2
30112 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30113 ; SSE41-NEXT: movdqa %xmm3, %xmm4
30114 ; SSE41-NEXT: pshufb %xmm2, %xmm4
30115 ; SSE41-NEXT: psrlw $4, %xmm0
30116 ; SSE41-NEXT: pand %xmm1, %xmm0
30117 ; SSE41-NEXT: pshufb %xmm0, %xmm3
30118 ; SSE41-NEXT: paddb %xmm4, %xmm3
30119 ; SSE41-NEXT: pxor %xmm0, %xmm0
30120 ; SSE41-NEXT: psadbw %xmm3, %xmm0
30121 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
30122 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [45,45,45,45]
30123 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
30126 ; AVX1-LABEL: ult_45_v2i64:
30128 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30129 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
30130 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30131 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
30132 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
30133 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
30134 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
30135 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
30136 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
30137 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30138 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [45,45]
30139 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
30142 ; AVX2-LABEL: ult_45_v2i64:
30144 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30145 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
30146 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30147 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
30148 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
30149 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
30150 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
30151 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
30152 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
30153 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30154 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [45,45]
30155 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
30158 ; AVX512VPOPCNTDQ-LABEL: ult_45_v2i64:
30159 ; AVX512VPOPCNTDQ: # %bb.0:
30160 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
30161 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
30162 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [45,45]
30163 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
30164 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
30165 ; AVX512VPOPCNTDQ-NEXT: retq
30167 ; AVX512VPOPCNTDQVL-LABEL: ult_45_v2i64:
30168 ; AVX512VPOPCNTDQVL: # %bb.0:
30169 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
30170 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [45,45]
30171 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
30172 ; AVX512VPOPCNTDQVL-NEXT: retq
30174 ; BITALG_NOVLX-LABEL: ult_45_v2i64:
30175 ; BITALG_NOVLX: # %bb.0:
30176 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
30177 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
30178 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
30179 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30180 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [45,45]
30181 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
30182 ; BITALG_NOVLX-NEXT: vzeroupper
30183 ; BITALG_NOVLX-NEXT: retq
30185 ; BITALG-LABEL: ult_45_v2i64:
30187 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
30188 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
30189 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30190 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [45,45]
30191 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
30192 ; BITALG-NEXT: retq
30193 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
30194 %3 = icmp ult <2 x i64> %2, <i64 45, i64 45>
30195 %4 = sext <2 x i1> %3 to <2 x i64>
30199 define <2 x i64> @ugt_45_v2i64(<2 x i64> %0) {
30200 ; SSE2-LABEL: ugt_45_v2i64:
30202 ; SSE2-NEXT: movdqa %xmm0, %xmm1
30203 ; SSE2-NEXT: psrlw $1, %xmm1
30204 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30205 ; SSE2-NEXT: psubb %xmm1, %xmm0
30206 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30207 ; SSE2-NEXT: movdqa %xmm0, %xmm2
30208 ; SSE2-NEXT: pand %xmm1, %xmm2
30209 ; SSE2-NEXT: psrlw $2, %xmm0
30210 ; SSE2-NEXT: pand %xmm1, %xmm0
30211 ; SSE2-NEXT: paddb %xmm2, %xmm0
30212 ; SSE2-NEXT: movdqa %xmm0, %xmm1
30213 ; SSE2-NEXT: psrlw $4, %xmm1
30214 ; SSE2-NEXT: paddb %xmm0, %xmm1
30215 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30216 ; SSE2-NEXT: pxor %xmm0, %xmm0
30217 ; SSE2-NEXT: psadbw %xmm1, %xmm0
30218 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
30219 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
30222 ; SSE3-LABEL: ugt_45_v2i64:
30224 ; SSE3-NEXT: movdqa %xmm0, %xmm1
30225 ; SSE3-NEXT: psrlw $1, %xmm1
30226 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30227 ; SSE3-NEXT: psubb %xmm1, %xmm0
30228 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30229 ; SSE3-NEXT: movdqa %xmm0, %xmm2
30230 ; SSE3-NEXT: pand %xmm1, %xmm2
30231 ; SSE3-NEXT: psrlw $2, %xmm0
30232 ; SSE3-NEXT: pand %xmm1, %xmm0
30233 ; SSE3-NEXT: paddb %xmm2, %xmm0
30234 ; SSE3-NEXT: movdqa %xmm0, %xmm1
30235 ; SSE3-NEXT: psrlw $4, %xmm1
30236 ; SSE3-NEXT: paddb %xmm0, %xmm1
30237 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30238 ; SSE3-NEXT: pxor %xmm0, %xmm0
30239 ; SSE3-NEXT: psadbw %xmm1, %xmm0
30240 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
30241 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
30244 ; SSSE3-LABEL: ugt_45_v2i64:
30246 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30247 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
30248 ; SSSE3-NEXT: pand %xmm1, %xmm2
30249 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30250 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
30251 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
30252 ; SSSE3-NEXT: psrlw $4, %xmm0
30253 ; SSSE3-NEXT: pand %xmm1, %xmm0
30254 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
30255 ; SSSE3-NEXT: paddb %xmm4, %xmm3
30256 ; SSSE3-NEXT: pxor %xmm0, %xmm0
30257 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
30258 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
30259 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
30262 ; SSE41-LABEL: ugt_45_v2i64:
30264 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30265 ; SSE41-NEXT: movdqa %xmm0, %xmm2
30266 ; SSE41-NEXT: pand %xmm1, %xmm2
30267 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30268 ; SSE41-NEXT: movdqa %xmm3, %xmm4
30269 ; SSE41-NEXT: pshufb %xmm2, %xmm4
30270 ; SSE41-NEXT: psrlw $4, %xmm0
30271 ; SSE41-NEXT: pand %xmm1, %xmm0
30272 ; SSE41-NEXT: pshufb %xmm0, %xmm3
30273 ; SSE41-NEXT: paddb %xmm4, %xmm3
30274 ; SSE41-NEXT: pxor %xmm0, %xmm0
30275 ; SSE41-NEXT: psadbw %xmm3, %xmm0
30276 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
30277 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
30280 ; AVX1-LABEL: ugt_45_v2i64:
30282 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30283 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
30284 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30285 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
30286 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
30287 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
30288 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
30289 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
30290 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
30291 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30292 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
30295 ; AVX2-LABEL: ugt_45_v2i64:
30297 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30298 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
30299 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30300 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
30301 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
30302 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
30303 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
30304 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
30305 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
30306 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30307 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
30310 ; AVX512VPOPCNTDQ-LABEL: ugt_45_v2i64:
30311 ; AVX512VPOPCNTDQ: # %bb.0:
30312 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
30313 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
30314 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
30315 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
30316 ; AVX512VPOPCNTDQ-NEXT: retq
30318 ; AVX512VPOPCNTDQVL-LABEL: ugt_45_v2i64:
30319 ; AVX512VPOPCNTDQVL: # %bb.0:
30320 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
30321 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [45,45]
30322 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
30323 ; AVX512VPOPCNTDQVL-NEXT: retq
30325 ; BITALG_NOVLX-LABEL: ugt_45_v2i64:
30326 ; BITALG_NOVLX: # %bb.0:
30327 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
30328 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
30329 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
30330 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30331 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
30332 ; BITALG_NOVLX-NEXT: vzeroupper
30333 ; BITALG_NOVLX-NEXT: retq
30335 ; BITALG-LABEL: ugt_45_v2i64:
30337 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
30338 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
30339 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30340 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [45,45]
30341 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
30342 ; BITALG-NEXT: retq
30343 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
30344 %3 = icmp ugt <2 x i64> %2, <i64 45, i64 45>
30345 %4 = sext <2 x i1> %3 to <2 x i64>
30349 define <2 x i64> @ult_46_v2i64(<2 x i64> %0) {
30350 ; SSE2-LABEL: ult_46_v2i64:
30352 ; SSE2-NEXT: movdqa %xmm0, %xmm1
30353 ; SSE2-NEXT: psrlw $1, %xmm1
30354 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30355 ; SSE2-NEXT: psubb %xmm1, %xmm0
30356 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30357 ; SSE2-NEXT: movdqa %xmm0, %xmm2
30358 ; SSE2-NEXT: pand %xmm1, %xmm2
30359 ; SSE2-NEXT: psrlw $2, %xmm0
30360 ; SSE2-NEXT: pand %xmm1, %xmm0
30361 ; SSE2-NEXT: paddb %xmm2, %xmm0
30362 ; SSE2-NEXT: movdqa %xmm0, %xmm1
30363 ; SSE2-NEXT: psrlw $4, %xmm1
30364 ; SSE2-NEXT: paddb %xmm0, %xmm1
30365 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30366 ; SSE2-NEXT: pxor %xmm0, %xmm0
30367 ; SSE2-NEXT: psadbw %xmm1, %xmm0
30368 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
30369 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [46,46,46,46]
30370 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
30373 ; SSE3-LABEL: ult_46_v2i64:
30375 ; SSE3-NEXT: movdqa %xmm0, %xmm1
30376 ; SSE3-NEXT: psrlw $1, %xmm1
30377 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30378 ; SSE3-NEXT: psubb %xmm1, %xmm0
30379 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30380 ; SSE3-NEXT: movdqa %xmm0, %xmm2
30381 ; SSE3-NEXT: pand %xmm1, %xmm2
30382 ; SSE3-NEXT: psrlw $2, %xmm0
30383 ; SSE3-NEXT: pand %xmm1, %xmm0
30384 ; SSE3-NEXT: paddb %xmm2, %xmm0
30385 ; SSE3-NEXT: movdqa %xmm0, %xmm1
30386 ; SSE3-NEXT: psrlw $4, %xmm1
30387 ; SSE3-NEXT: paddb %xmm0, %xmm1
30388 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30389 ; SSE3-NEXT: pxor %xmm0, %xmm0
30390 ; SSE3-NEXT: psadbw %xmm1, %xmm0
30391 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
30392 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [46,46,46,46]
30393 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
30396 ; SSSE3-LABEL: ult_46_v2i64:
30398 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30399 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
30400 ; SSSE3-NEXT: pand %xmm1, %xmm2
30401 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30402 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
30403 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
30404 ; SSSE3-NEXT: psrlw $4, %xmm0
30405 ; SSSE3-NEXT: pand %xmm1, %xmm0
30406 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
30407 ; SSSE3-NEXT: paddb %xmm4, %xmm3
30408 ; SSSE3-NEXT: pxor %xmm0, %xmm0
30409 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
30410 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
30411 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [46,46,46,46]
30412 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
30415 ; SSE41-LABEL: ult_46_v2i64:
30417 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30418 ; SSE41-NEXT: movdqa %xmm0, %xmm2
30419 ; SSE41-NEXT: pand %xmm1, %xmm2
30420 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30421 ; SSE41-NEXT: movdqa %xmm3, %xmm4
30422 ; SSE41-NEXT: pshufb %xmm2, %xmm4
30423 ; SSE41-NEXT: psrlw $4, %xmm0
30424 ; SSE41-NEXT: pand %xmm1, %xmm0
30425 ; SSE41-NEXT: pshufb %xmm0, %xmm3
30426 ; SSE41-NEXT: paddb %xmm4, %xmm3
30427 ; SSE41-NEXT: pxor %xmm0, %xmm0
30428 ; SSE41-NEXT: psadbw %xmm3, %xmm0
30429 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
30430 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [46,46,46,46]
30431 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
30434 ; AVX1-LABEL: ult_46_v2i64:
30436 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30437 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
30438 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30439 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
30440 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
30441 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
30442 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
30443 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
30444 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
30445 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30446 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [46,46]
30447 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
30450 ; AVX2-LABEL: ult_46_v2i64:
30452 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30453 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
30454 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30455 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
30456 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
30457 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
30458 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
30459 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
30460 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
30461 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30462 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [46,46]
30463 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
30466 ; AVX512VPOPCNTDQ-LABEL: ult_46_v2i64:
30467 ; AVX512VPOPCNTDQ: # %bb.0:
30468 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
30469 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
30470 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [46,46]
30471 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
30472 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
30473 ; AVX512VPOPCNTDQ-NEXT: retq
30475 ; AVX512VPOPCNTDQVL-LABEL: ult_46_v2i64:
30476 ; AVX512VPOPCNTDQVL: # %bb.0:
30477 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
30478 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [46,46]
30479 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
30480 ; AVX512VPOPCNTDQVL-NEXT: retq
30482 ; BITALG_NOVLX-LABEL: ult_46_v2i64:
30483 ; BITALG_NOVLX: # %bb.0:
30484 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
30485 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
30486 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
30487 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30488 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [46,46]
30489 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
30490 ; BITALG_NOVLX-NEXT: vzeroupper
30491 ; BITALG_NOVLX-NEXT: retq
30493 ; BITALG-LABEL: ult_46_v2i64:
30495 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
30496 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
30497 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30498 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [46,46]
30499 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
30500 ; BITALG-NEXT: retq
30501 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
30502 %3 = icmp ult <2 x i64> %2, <i64 46, i64 46>
30503 %4 = sext <2 x i1> %3 to <2 x i64>
30507 define <2 x i64> @ugt_46_v2i64(<2 x i64> %0) {
30508 ; SSE2-LABEL: ugt_46_v2i64:
30510 ; SSE2-NEXT: movdqa %xmm0, %xmm1
30511 ; SSE2-NEXT: psrlw $1, %xmm1
30512 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30513 ; SSE2-NEXT: psubb %xmm1, %xmm0
30514 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30515 ; SSE2-NEXT: movdqa %xmm0, %xmm2
30516 ; SSE2-NEXT: pand %xmm1, %xmm2
30517 ; SSE2-NEXT: psrlw $2, %xmm0
30518 ; SSE2-NEXT: pand %xmm1, %xmm0
30519 ; SSE2-NEXT: paddb %xmm2, %xmm0
30520 ; SSE2-NEXT: movdqa %xmm0, %xmm1
30521 ; SSE2-NEXT: psrlw $4, %xmm1
30522 ; SSE2-NEXT: paddb %xmm0, %xmm1
30523 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30524 ; SSE2-NEXT: pxor %xmm0, %xmm0
30525 ; SSE2-NEXT: psadbw %xmm1, %xmm0
30526 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
30527 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
30530 ; SSE3-LABEL: ugt_46_v2i64:
30532 ; SSE3-NEXT: movdqa %xmm0, %xmm1
30533 ; SSE3-NEXT: psrlw $1, %xmm1
30534 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30535 ; SSE3-NEXT: psubb %xmm1, %xmm0
30536 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30537 ; SSE3-NEXT: movdqa %xmm0, %xmm2
30538 ; SSE3-NEXT: pand %xmm1, %xmm2
30539 ; SSE3-NEXT: psrlw $2, %xmm0
30540 ; SSE3-NEXT: pand %xmm1, %xmm0
30541 ; SSE3-NEXT: paddb %xmm2, %xmm0
30542 ; SSE3-NEXT: movdqa %xmm0, %xmm1
30543 ; SSE3-NEXT: psrlw $4, %xmm1
30544 ; SSE3-NEXT: paddb %xmm0, %xmm1
30545 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30546 ; SSE3-NEXT: pxor %xmm0, %xmm0
30547 ; SSE3-NEXT: psadbw %xmm1, %xmm0
30548 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
30549 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
30552 ; SSSE3-LABEL: ugt_46_v2i64:
30554 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30555 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
30556 ; SSSE3-NEXT: pand %xmm1, %xmm2
30557 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30558 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
30559 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
30560 ; SSSE3-NEXT: psrlw $4, %xmm0
30561 ; SSSE3-NEXT: pand %xmm1, %xmm0
30562 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
30563 ; SSSE3-NEXT: paddb %xmm4, %xmm3
30564 ; SSSE3-NEXT: pxor %xmm0, %xmm0
30565 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
30566 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
30567 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
30570 ; SSE41-LABEL: ugt_46_v2i64:
30572 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30573 ; SSE41-NEXT: movdqa %xmm0, %xmm2
30574 ; SSE41-NEXT: pand %xmm1, %xmm2
30575 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30576 ; SSE41-NEXT: movdqa %xmm3, %xmm4
30577 ; SSE41-NEXT: pshufb %xmm2, %xmm4
30578 ; SSE41-NEXT: psrlw $4, %xmm0
30579 ; SSE41-NEXT: pand %xmm1, %xmm0
30580 ; SSE41-NEXT: pshufb %xmm0, %xmm3
30581 ; SSE41-NEXT: paddb %xmm4, %xmm3
30582 ; SSE41-NEXT: pxor %xmm0, %xmm0
30583 ; SSE41-NEXT: psadbw %xmm3, %xmm0
30584 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
30585 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
30588 ; AVX1-LABEL: ugt_46_v2i64:
30590 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30591 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
30592 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30593 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
30594 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
30595 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
30596 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
30597 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
30598 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
30599 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30600 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
30603 ; AVX2-LABEL: ugt_46_v2i64:
30605 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30606 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
30607 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30608 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
30609 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
30610 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
30611 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
30612 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
30613 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
30614 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30615 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
30618 ; AVX512VPOPCNTDQ-LABEL: ugt_46_v2i64:
30619 ; AVX512VPOPCNTDQ: # %bb.0:
30620 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
30621 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
30622 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
30623 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
30624 ; AVX512VPOPCNTDQ-NEXT: retq
30626 ; AVX512VPOPCNTDQVL-LABEL: ugt_46_v2i64:
30627 ; AVX512VPOPCNTDQVL: # %bb.0:
30628 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
30629 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [46,46]
30630 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
30631 ; AVX512VPOPCNTDQVL-NEXT: retq
30633 ; BITALG_NOVLX-LABEL: ugt_46_v2i64:
30634 ; BITALG_NOVLX: # %bb.0:
30635 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
30636 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
30637 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
30638 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30639 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
30640 ; BITALG_NOVLX-NEXT: vzeroupper
30641 ; BITALG_NOVLX-NEXT: retq
30643 ; BITALG-LABEL: ugt_46_v2i64:
30645 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
30646 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
30647 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30648 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [46,46]
30649 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
30650 ; BITALG-NEXT: retq
30651 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
30652 %3 = icmp ugt <2 x i64> %2, <i64 46, i64 46>
30653 %4 = sext <2 x i1> %3 to <2 x i64>
30657 define <2 x i64> @ult_47_v2i64(<2 x i64> %0) {
30658 ; SSE2-LABEL: ult_47_v2i64:
30660 ; SSE2-NEXT: movdqa %xmm0, %xmm1
30661 ; SSE2-NEXT: psrlw $1, %xmm1
30662 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30663 ; SSE2-NEXT: psubb %xmm1, %xmm0
30664 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30665 ; SSE2-NEXT: movdqa %xmm0, %xmm2
30666 ; SSE2-NEXT: pand %xmm1, %xmm2
30667 ; SSE2-NEXT: psrlw $2, %xmm0
30668 ; SSE2-NEXT: pand %xmm1, %xmm0
30669 ; SSE2-NEXT: paddb %xmm2, %xmm0
30670 ; SSE2-NEXT: movdqa %xmm0, %xmm1
30671 ; SSE2-NEXT: psrlw $4, %xmm1
30672 ; SSE2-NEXT: paddb %xmm0, %xmm1
30673 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30674 ; SSE2-NEXT: pxor %xmm0, %xmm0
30675 ; SSE2-NEXT: psadbw %xmm1, %xmm0
30676 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
30677 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [47,47,47,47]
30678 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
30681 ; SSE3-LABEL: ult_47_v2i64:
30683 ; SSE3-NEXT: movdqa %xmm0, %xmm1
30684 ; SSE3-NEXT: psrlw $1, %xmm1
30685 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30686 ; SSE3-NEXT: psubb %xmm1, %xmm0
30687 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30688 ; SSE3-NEXT: movdqa %xmm0, %xmm2
30689 ; SSE3-NEXT: pand %xmm1, %xmm2
30690 ; SSE3-NEXT: psrlw $2, %xmm0
30691 ; SSE3-NEXT: pand %xmm1, %xmm0
30692 ; SSE3-NEXT: paddb %xmm2, %xmm0
30693 ; SSE3-NEXT: movdqa %xmm0, %xmm1
30694 ; SSE3-NEXT: psrlw $4, %xmm1
30695 ; SSE3-NEXT: paddb %xmm0, %xmm1
30696 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30697 ; SSE3-NEXT: pxor %xmm0, %xmm0
30698 ; SSE3-NEXT: psadbw %xmm1, %xmm0
30699 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
30700 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [47,47,47,47]
30701 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
30704 ; SSSE3-LABEL: ult_47_v2i64:
30706 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30707 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
30708 ; SSSE3-NEXT: pand %xmm1, %xmm2
30709 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30710 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
30711 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
30712 ; SSSE3-NEXT: psrlw $4, %xmm0
30713 ; SSSE3-NEXT: pand %xmm1, %xmm0
30714 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
30715 ; SSSE3-NEXT: paddb %xmm4, %xmm3
30716 ; SSSE3-NEXT: pxor %xmm0, %xmm0
30717 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
30718 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
30719 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [47,47,47,47]
30720 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
30723 ; SSE41-LABEL: ult_47_v2i64:
30725 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30726 ; SSE41-NEXT: movdqa %xmm0, %xmm2
30727 ; SSE41-NEXT: pand %xmm1, %xmm2
30728 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30729 ; SSE41-NEXT: movdqa %xmm3, %xmm4
30730 ; SSE41-NEXT: pshufb %xmm2, %xmm4
30731 ; SSE41-NEXT: psrlw $4, %xmm0
30732 ; SSE41-NEXT: pand %xmm1, %xmm0
30733 ; SSE41-NEXT: pshufb %xmm0, %xmm3
30734 ; SSE41-NEXT: paddb %xmm4, %xmm3
30735 ; SSE41-NEXT: pxor %xmm0, %xmm0
30736 ; SSE41-NEXT: psadbw %xmm3, %xmm0
30737 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
30738 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [47,47,47,47]
30739 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
30742 ; AVX1-LABEL: ult_47_v2i64:
30744 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30745 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
30746 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30747 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
30748 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
30749 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
30750 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
30751 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
30752 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
30753 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30754 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [47,47]
30755 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
30758 ; AVX2-LABEL: ult_47_v2i64:
30760 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30761 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
30762 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30763 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
30764 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
30765 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
30766 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
30767 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
30768 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
30769 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30770 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [47,47]
30771 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
30774 ; AVX512VPOPCNTDQ-LABEL: ult_47_v2i64:
30775 ; AVX512VPOPCNTDQ: # %bb.0:
30776 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
30777 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
30778 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [47,47]
30779 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
30780 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
30781 ; AVX512VPOPCNTDQ-NEXT: retq
30783 ; AVX512VPOPCNTDQVL-LABEL: ult_47_v2i64:
30784 ; AVX512VPOPCNTDQVL: # %bb.0:
30785 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
30786 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [47,47]
30787 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
30788 ; AVX512VPOPCNTDQVL-NEXT: retq
30790 ; BITALG_NOVLX-LABEL: ult_47_v2i64:
30791 ; BITALG_NOVLX: # %bb.0:
30792 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
30793 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
30794 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
30795 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30796 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [47,47]
30797 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
30798 ; BITALG_NOVLX-NEXT: vzeroupper
30799 ; BITALG_NOVLX-NEXT: retq
30801 ; BITALG-LABEL: ult_47_v2i64:
30803 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
30804 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
30805 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30806 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [47,47]
30807 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
30808 ; BITALG-NEXT: retq
30809 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
30810 %3 = icmp ult <2 x i64> %2, <i64 47, i64 47>
30811 %4 = sext <2 x i1> %3 to <2 x i64>
30815 define <2 x i64> @ugt_47_v2i64(<2 x i64> %0) {
30816 ; SSE2-LABEL: ugt_47_v2i64:
30818 ; SSE2-NEXT: movdqa %xmm0, %xmm1
30819 ; SSE2-NEXT: psrlw $1, %xmm1
30820 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30821 ; SSE2-NEXT: psubb %xmm1, %xmm0
30822 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30823 ; SSE2-NEXT: movdqa %xmm0, %xmm2
30824 ; SSE2-NEXT: pand %xmm1, %xmm2
30825 ; SSE2-NEXT: psrlw $2, %xmm0
30826 ; SSE2-NEXT: pand %xmm1, %xmm0
30827 ; SSE2-NEXT: paddb %xmm2, %xmm0
30828 ; SSE2-NEXT: movdqa %xmm0, %xmm1
30829 ; SSE2-NEXT: psrlw $4, %xmm1
30830 ; SSE2-NEXT: paddb %xmm0, %xmm1
30831 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30832 ; SSE2-NEXT: pxor %xmm0, %xmm0
30833 ; SSE2-NEXT: psadbw %xmm1, %xmm0
30834 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
30835 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
30838 ; SSE3-LABEL: ugt_47_v2i64:
30840 ; SSE3-NEXT: movdqa %xmm0, %xmm1
30841 ; SSE3-NEXT: psrlw $1, %xmm1
30842 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30843 ; SSE3-NEXT: psubb %xmm1, %xmm0
30844 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30845 ; SSE3-NEXT: movdqa %xmm0, %xmm2
30846 ; SSE3-NEXT: pand %xmm1, %xmm2
30847 ; SSE3-NEXT: psrlw $2, %xmm0
30848 ; SSE3-NEXT: pand %xmm1, %xmm0
30849 ; SSE3-NEXT: paddb %xmm2, %xmm0
30850 ; SSE3-NEXT: movdqa %xmm0, %xmm1
30851 ; SSE3-NEXT: psrlw $4, %xmm1
30852 ; SSE3-NEXT: paddb %xmm0, %xmm1
30853 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30854 ; SSE3-NEXT: pxor %xmm0, %xmm0
30855 ; SSE3-NEXT: psadbw %xmm1, %xmm0
30856 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
30857 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
30860 ; SSSE3-LABEL: ugt_47_v2i64:
30862 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30863 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
30864 ; SSSE3-NEXT: pand %xmm1, %xmm2
30865 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30866 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
30867 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
30868 ; SSSE3-NEXT: psrlw $4, %xmm0
30869 ; SSSE3-NEXT: pand %xmm1, %xmm0
30870 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
30871 ; SSSE3-NEXT: paddb %xmm4, %xmm3
30872 ; SSSE3-NEXT: pxor %xmm0, %xmm0
30873 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
30874 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
30875 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
30878 ; SSE41-LABEL: ugt_47_v2i64:
30880 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30881 ; SSE41-NEXT: movdqa %xmm0, %xmm2
30882 ; SSE41-NEXT: pand %xmm1, %xmm2
30883 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30884 ; SSE41-NEXT: movdqa %xmm3, %xmm4
30885 ; SSE41-NEXT: pshufb %xmm2, %xmm4
30886 ; SSE41-NEXT: psrlw $4, %xmm0
30887 ; SSE41-NEXT: pand %xmm1, %xmm0
30888 ; SSE41-NEXT: pshufb %xmm0, %xmm3
30889 ; SSE41-NEXT: paddb %xmm4, %xmm3
30890 ; SSE41-NEXT: pxor %xmm0, %xmm0
30891 ; SSE41-NEXT: psadbw %xmm3, %xmm0
30892 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
30893 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
30896 ; AVX1-LABEL: ugt_47_v2i64:
30898 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30899 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
30900 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30901 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
30902 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
30903 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
30904 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
30905 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
30906 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
30907 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30908 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
30911 ; AVX2-LABEL: ugt_47_v2i64:
30913 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
30914 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
30915 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
30916 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
30917 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
30918 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
30919 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
30920 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
30921 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
30922 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30923 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
30926 ; AVX512VPOPCNTDQ-LABEL: ugt_47_v2i64:
30927 ; AVX512VPOPCNTDQ: # %bb.0:
30928 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
30929 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
30930 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
30931 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
30932 ; AVX512VPOPCNTDQ-NEXT: retq
30934 ; AVX512VPOPCNTDQVL-LABEL: ugt_47_v2i64:
30935 ; AVX512VPOPCNTDQVL: # %bb.0:
30936 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
30937 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [47,47]
30938 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
30939 ; AVX512VPOPCNTDQVL-NEXT: retq
30941 ; BITALG_NOVLX-LABEL: ugt_47_v2i64:
30942 ; BITALG_NOVLX: # %bb.0:
30943 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
30944 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
30945 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
30946 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30947 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
30948 ; BITALG_NOVLX-NEXT: vzeroupper
30949 ; BITALG_NOVLX-NEXT: retq
30951 ; BITALG-LABEL: ugt_47_v2i64:
30953 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
30954 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
30955 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
30956 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [47,47]
30957 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
30958 ; BITALG-NEXT: retq
30959 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
30960 %3 = icmp ugt <2 x i64> %2, <i64 47, i64 47>
30961 %4 = sext <2 x i1> %3 to <2 x i64>
30965 define <2 x i64> @ult_48_v2i64(<2 x i64> %0) {
30966 ; SSE2-LABEL: ult_48_v2i64:
30968 ; SSE2-NEXT: movdqa %xmm0, %xmm1
30969 ; SSE2-NEXT: psrlw $1, %xmm1
30970 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30971 ; SSE2-NEXT: psubb %xmm1, %xmm0
30972 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30973 ; SSE2-NEXT: movdqa %xmm0, %xmm2
30974 ; SSE2-NEXT: pand %xmm1, %xmm2
30975 ; SSE2-NEXT: psrlw $2, %xmm0
30976 ; SSE2-NEXT: pand %xmm1, %xmm0
30977 ; SSE2-NEXT: paddb %xmm2, %xmm0
30978 ; SSE2-NEXT: movdqa %xmm0, %xmm1
30979 ; SSE2-NEXT: psrlw $4, %xmm1
30980 ; SSE2-NEXT: paddb %xmm0, %xmm1
30981 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30982 ; SSE2-NEXT: pxor %xmm0, %xmm0
30983 ; SSE2-NEXT: psadbw %xmm1, %xmm0
30984 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
30985 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [48,48,48,48]
30986 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
30989 ; SSE3-LABEL: ult_48_v2i64:
30991 ; SSE3-NEXT: movdqa %xmm0, %xmm1
30992 ; SSE3-NEXT: psrlw $1, %xmm1
30993 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
30994 ; SSE3-NEXT: psubb %xmm1, %xmm0
30995 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
30996 ; SSE3-NEXT: movdqa %xmm0, %xmm2
30997 ; SSE3-NEXT: pand %xmm1, %xmm2
30998 ; SSE3-NEXT: psrlw $2, %xmm0
30999 ; SSE3-NEXT: pand %xmm1, %xmm0
31000 ; SSE3-NEXT: paddb %xmm2, %xmm0
31001 ; SSE3-NEXT: movdqa %xmm0, %xmm1
31002 ; SSE3-NEXT: psrlw $4, %xmm1
31003 ; SSE3-NEXT: paddb %xmm0, %xmm1
31004 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31005 ; SSE3-NEXT: pxor %xmm0, %xmm0
31006 ; SSE3-NEXT: psadbw %xmm1, %xmm0
31007 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
31008 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [48,48,48,48]
31009 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
31012 ; SSSE3-LABEL: ult_48_v2i64:
31014 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31015 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
31016 ; SSSE3-NEXT: pand %xmm1, %xmm2
31017 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31018 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
31019 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
31020 ; SSSE3-NEXT: psrlw $4, %xmm0
31021 ; SSSE3-NEXT: pand %xmm1, %xmm0
31022 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
31023 ; SSSE3-NEXT: paddb %xmm4, %xmm3
31024 ; SSSE3-NEXT: pxor %xmm0, %xmm0
31025 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
31026 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
31027 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [48,48,48,48]
31028 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
31031 ; SSE41-LABEL: ult_48_v2i64:
31033 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31034 ; SSE41-NEXT: movdqa %xmm0, %xmm2
31035 ; SSE41-NEXT: pand %xmm1, %xmm2
31036 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31037 ; SSE41-NEXT: movdqa %xmm3, %xmm4
31038 ; SSE41-NEXT: pshufb %xmm2, %xmm4
31039 ; SSE41-NEXT: psrlw $4, %xmm0
31040 ; SSE41-NEXT: pand %xmm1, %xmm0
31041 ; SSE41-NEXT: pshufb %xmm0, %xmm3
31042 ; SSE41-NEXT: paddb %xmm4, %xmm3
31043 ; SSE41-NEXT: pxor %xmm0, %xmm0
31044 ; SSE41-NEXT: psadbw %xmm3, %xmm0
31045 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
31046 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [48,48,48,48]
31047 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
31050 ; AVX1-LABEL: ult_48_v2i64:
31052 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31053 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
31054 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31055 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
31056 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
31057 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
31058 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
31059 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
31060 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
31061 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31062 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [48,48]
31063 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
31066 ; AVX2-LABEL: ult_48_v2i64:
31068 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31069 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
31070 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31071 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
31072 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
31073 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
31074 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
31075 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
31076 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
31077 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31078 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [48,48]
31079 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
31082 ; AVX512VPOPCNTDQ-LABEL: ult_48_v2i64:
31083 ; AVX512VPOPCNTDQ: # %bb.0:
31084 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
31085 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
31086 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [48,48]
31087 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
31088 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
31089 ; AVX512VPOPCNTDQ-NEXT: retq
31091 ; AVX512VPOPCNTDQVL-LABEL: ult_48_v2i64:
31092 ; AVX512VPOPCNTDQVL: # %bb.0:
31093 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
31094 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [48,48]
31095 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
31096 ; AVX512VPOPCNTDQVL-NEXT: retq
31098 ; BITALG_NOVLX-LABEL: ult_48_v2i64:
31099 ; BITALG_NOVLX: # %bb.0:
31100 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
31101 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
31102 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
31103 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31104 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [48,48]
31105 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
31106 ; BITALG_NOVLX-NEXT: vzeroupper
31107 ; BITALG_NOVLX-NEXT: retq
31109 ; BITALG-LABEL: ult_48_v2i64:
31111 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
31112 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
31113 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31114 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [48,48]
31115 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
31116 ; BITALG-NEXT: retq
31117 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
31118 %3 = icmp ult <2 x i64> %2, <i64 48, i64 48>
31119 %4 = sext <2 x i1> %3 to <2 x i64>
31123 define <2 x i64> @ugt_48_v2i64(<2 x i64> %0) {
31124 ; SSE2-LABEL: ugt_48_v2i64:
31126 ; SSE2-NEXT: movdqa %xmm0, %xmm1
31127 ; SSE2-NEXT: psrlw $1, %xmm1
31128 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31129 ; SSE2-NEXT: psubb %xmm1, %xmm0
31130 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31131 ; SSE2-NEXT: movdqa %xmm0, %xmm2
31132 ; SSE2-NEXT: pand %xmm1, %xmm2
31133 ; SSE2-NEXT: psrlw $2, %xmm0
31134 ; SSE2-NEXT: pand %xmm1, %xmm0
31135 ; SSE2-NEXT: paddb %xmm2, %xmm0
31136 ; SSE2-NEXT: movdqa %xmm0, %xmm1
31137 ; SSE2-NEXT: psrlw $4, %xmm1
31138 ; SSE2-NEXT: paddb %xmm0, %xmm1
31139 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31140 ; SSE2-NEXT: pxor %xmm0, %xmm0
31141 ; SSE2-NEXT: psadbw %xmm1, %xmm0
31142 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
31143 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
31146 ; SSE3-LABEL: ugt_48_v2i64:
31148 ; SSE3-NEXT: movdqa %xmm0, %xmm1
31149 ; SSE3-NEXT: psrlw $1, %xmm1
31150 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31151 ; SSE3-NEXT: psubb %xmm1, %xmm0
31152 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31153 ; SSE3-NEXT: movdqa %xmm0, %xmm2
31154 ; SSE3-NEXT: pand %xmm1, %xmm2
31155 ; SSE3-NEXT: psrlw $2, %xmm0
31156 ; SSE3-NEXT: pand %xmm1, %xmm0
31157 ; SSE3-NEXT: paddb %xmm2, %xmm0
31158 ; SSE3-NEXT: movdqa %xmm0, %xmm1
31159 ; SSE3-NEXT: psrlw $4, %xmm1
31160 ; SSE3-NEXT: paddb %xmm0, %xmm1
31161 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31162 ; SSE3-NEXT: pxor %xmm0, %xmm0
31163 ; SSE3-NEXT: psadbw %xmm1, %xmm0
31164 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
31165 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
31168 ; SSSE3-LABEL: ugt_48_v2i64:
31170 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31171 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
31172 ; SSSE3-NEXT: pand %xmm1, %xmm2
31173 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31174 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
31175 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
31176 ; SSSE3-NEXT: psrlw $4, %xmm0
31177 ; SSSE3-NEXT: pand %xmm1, %xmm0
31178 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
31179 ; SSSE3-NEXT: paddb %xmm4, %xmm3
31180 ; SSSE3-NEXT: pxor %xmm0, %xmm0
31181 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
31182 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
31183 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
31186 ; SSE41-LABEL: ugt_48_v2i64:
31188 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31189 ; SSE41-NEXT: movdqa %xmm0, %xmm2
31190 ; SSE41-NEXT: pand %xmm1, %xmm2
31191 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31192 ; SSE41-NEXT: movdqa %xmm3, %xmm4
31193 ; SSE41-NEXT: pshufb %xmm2, %xmm4
31194 ; SSE41-NEXT: psrlw $4, %xmm0
31195 ; SSE41-NEXT: pand %xmm1, %xmm0
31196 ; SSE41-NEXT: pshufb %xmm0, %xmm3
31197 ; SSE41-NEXT: paddb %xmm4, %xmm3
31198 ; SSE41-NEXT: pxor %xmm0, %xmm0
31199 ; SSE41-NEXT: psadbw %xmm3, %xmm0
31200 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
31201 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
31204 ; AVX1-LABEL: ugt_48_v2i64:
31206 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31207 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
31208 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31209 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
31210 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
31211 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
31212 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
31213 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
31214 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
31215 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31216 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
31219 ; AVX2-LABEL: ugt_48_v2i64:
31221 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31222 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
31223 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31224 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
31225 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
31226 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
31227 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
31228 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
31229 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
31230 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31231 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
31234 ; AVX512VPOPCNTDQ-LABEL: ugt_48_v2i64:
31235 ; AVX512VPOPCNTDQ: # %bb.0:
31236 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
31237 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
31238 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
31239 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
31240 ; AVX512VPOPCNTDQ-NEXT: retq
31242 ; AVX512VPOPCNTDQVL-LABEL: ugt_48_v2i64:
31243 ; AVX512VPOPCNTDQVL: # %bb.0:
31244 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
31245 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [48,48]
31246 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
31247 ; AVX512VPOPCNTDQVL-NEXT: retq
31249 ; BITALG_NOVLX-LABEL: ugt_48_v2i64:
31250 ; BITALG_NOVLX: # %bb.0:
31251 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
31252 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
31253 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
31254 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31255 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
31256 ; BITALG_NOVLX-NEXT: vzeroupper
31257 ; BITALG_NOVLX-NEXT: retq
31259 ; BITALG-LABEL: ugt_48_v2i64:
31261 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
31262 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
31263 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31264 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [48,48]
31265 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
31266 ; BITALG-NEXT: retq
31267 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
31268 %3 = icmp ugt <2 x i64> %2, <i64 48, i64 48>
31269 %4 = sext <2 x i1> %3 to <2 x i64>
31273 define <2 x i64> @ult_49_v2i64(<2 x i64> %0) {
31274 ; SSE2-LABEL: ult_49_v2i64:
31276 ; SSE2-NEXT: movdqa %xmm0, %xmm1
31277 ; SSE2-NEXT: psrlw $1, %xmm1
31278 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31279 ; SSE2-NEXT: psubb %xmm1, %xmm0
31280 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31281 ; SSE2-NEXT: movdqa %xmm0, %xmm2
31282 ; SSE2-NEXT: pand %xmm1, %xmm2
31283 ; SSE2-NEXT: psrlw $2, %xmm0
31284 ; SSE2-NEXT: pand %xmm1, %xmm0
31285 ; SSE2-NEXT: paddb %xmm2, %xmm0
31286 ; SSE2-NEXT: movdqa %xmm0, %xmm1
31287 ; SSE2-NEXT: psrlw $4, %xmm1
31288 ; SSE2-NEXT: paddb %xmm0, %xmm1
31289 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31290 ; SSE2-NEXT: pxor %xmm0, %xmm0
31291 ; SSE2-NEXT: psadbw %xmm1, %xmm0
31292 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
31293 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [49,49,49,49]
31294 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
31297 ; SSE3-LABEL: ult_49_v2i64:
31299 ; SSE3-NEXT: movdqa %xmm0, %xmm1
31300 ; SSE3-NEXT: psrlw $1, %xmm1
31301 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31302 ; SSE3-NEXT: psubb %xmm1, %xmm0
31303 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31304 ; SSE3-NEXT: movdqa %xmm0, %xmm2
31305 ; SSE3-NEXT: pand %xmm1, %xmm2
31306 ; SSE3-NEXT: psrlw $2, %xmm0
31307 ; SSE3-NEXT: pand %xmm1, %xmm0
31308 ; SSE3-NEXT: paddb %xmm2, %xmm0
31309 ; SSE3-NEXT: movdqa %xmm0, %xmm1
31310 ; SSE3-NEXT: psrlw $4, %xmm1
31311 ; SSE3-NEXT: paddb %xmm0, %xmm1
31312 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31313 ; SSE3-NEXT: pxor %xmm0, %xmm0
31314 ; SSE3-NEXT: psadbw %xmm1, %xmm0
31315 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
31316 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [49,49,49,49]
31317 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
31320 ; SSSE3-LABEL: ult_49_v2i64:
31322 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31323 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
31324 ; SSSE3-NEXT: pand %xmm1, %xmm2
31325 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31326 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
31327 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
31328 ; SSSE3-NEXT: psrlw $4, %xmm0
31329 ; SSSE3-NEXT: pand %xmm1, %xmm0
31330 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
31331 ; SSSE3-NEXT: paddb %xmm4, %xmm3
31332 ; SSSE3-NEXT: pxor %xmm0, %xmm0
31333 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
31334 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
31335 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [49,49,49,49]
31336 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
31339 ; SSE41-LABEL: ult_49_v2i64:
31341 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31342 ; SSE41-NEXT: movdqa %xmm0, %xmm2
31343 ; SSE41-NEXT: pand %xmm1, %xmm2
31344 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31345 ; SSE41-NEXT: movdqa %xmm3, %xmm4
31346 ; SSE41-NEXT: pshufb %xmm2, %xmm4
31347 ; SSE41-NEXT: psrlw $4, %xmm0
31348 ; SSE41-NEXT: pand %xmm1, %xmm0
31349 ; SSE41-NEXT: pshufb %xmm0, %xmm3
31350 ; SSE41-NEXT: paddb %xmm4, %xmm3
31351 ; SSE41-NEXT: pxor %xmm0, %xmm0
31352 ; SSE41-NEXT: psadbw %xmm3, %xmm0
31353 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
31354 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [49,49,49,49]
31355 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
31358 ; AVX1-LABEL: ult_49_v2i64:
31360 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31361 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
31362 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31363 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
31364 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
31365 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
31366 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
31367 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
31368 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
31369 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31370 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [49,49]
31371 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
31374 ; AVX2-LABEL: ult_49_v2i64:
31376 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31377 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
31378 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31379 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
31380 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
31381 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
31382 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
31383 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
31384 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
31385 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31386 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [49,49]
31387 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
31390 ; AVX512VPOPCNTDQ-LABEL: ult_49_v2i64:
31391 ; AVX512VPOPCNTDQ: # %bb.0:
31392 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
31393 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
31394 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [49,49]
31395 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
31396 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
31397 ; AVX512VPOPCNTDQ-NEXT: retq
31399 ; AVX512VPOPCNTDQVL-LABEL: ult_49_v2i64:
31400 ; AVX512VPOPCNTDQVL: # %bb.0:
31401 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
31402 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [49,49]
31403 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
31404 ; AVX512VPOPCNTDQVL-NEXT: retq
31406 ; BITALG_NOVLX-LABEL: ult_49_v2i64:
31407 ; BITALG_NOVLX: # %bb.0:
31408 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
31409 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
31410 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
31411 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31412 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [49,49]
31413 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
31414 ; BITALG_NOVLX-NEXT: vzeroupper
31415 ; BITALG_NOVLX-NEXT: retq
31417 ; BITALG-LABEL: ult_49_v2i64:
31419 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
31420 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
31421 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31422 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [49,49]
31423 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
31424 ; BITALG-NEXT: retq
31425 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
31426 %3 = icmp ult <2 x i64> %2, <i64 49, i64 49>
31427 %4 = sext <2 x i1> %3 to <2 x i64>
31431 define <2 x i64> @ugt_49_v2i64(<2 x i64> %0) {
31432 ; SSE2-LABEL: ugt_49_v2i64:
31434 ; SSE2-NEXT: movdqa %xmm0, %xmm1
31435 ; SSE2-NEXT: psrlw $1, %xmm1
31436 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31437 ; SSE2-NEXT: psubb %xmm1, %xmm0
31438 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31439 ; SSE2-NEXT: movdqa %xmm0, %xmm2
31440 ; SSE2-NEXT: pand %xmm1, %xmm2
31441 ; SSE2-NEXT: psrlw $2, %xmm0
31442 ; SSE2-NEXT: pand %xmm1, %xmm0
31443 ; SSE2-NEXT: paddb %xmm2, %xmm0
31444 ; SSE2-NEXT: movdqa %xmm0, %xmm1
31445 ; SSE2-NEXT: psrlw $4, %xmm1
31446 ; SSE2-NEXT: paddb %xmm0, %xmm1
31447 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31448 ; SSE2-NEXT: pxor %xmm0, %xmm0
31449 ; SSE2-NEXT: psadbw %xmm1, %xmm0
31450 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
31451 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
31454 ; SSE3-LABEL: ugt_49_v2i64:
31456 ; SSE3-NEXT: movdqa %xmm0, %xmm1
31457 ; SSE3-NEXT: psrlw $1, %xmm1
31458 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31459 ; SSE3-NEXT: psubb %xmm1, %xmm0
31460 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31461 ; SSE3-NEXT: movdqa %xmm0, %xmm2
31462 ; SSE3-NEXT: pand %xmm1, %xmm2
31463 ; SSE3-NEXT: psrlw $2, %xmm0
31464 ; SSE3-NEXT: pand %xmm1, %xmm0
31465 ; SSE3-NEXT: paddb %xmm2, %xmm0
31466 ; SSE3-NEXT: movdqa %xmm0, %xmm1
31467 ; SSE3-NEXT: psrlw $4, %xmm1
31468 ; SSE3-NEXT: paddb %xmm0, %xmm1
31469 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31470 ; SSE3-NEXT: pxor %xmm0, %xmm0
31471 ; SSE3-NEXT: psadbw %xmm1, %xmm0
31472 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
31473 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
31476 ; SSSE3-LABEL: ugt_49_v2i64:
31478 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31479 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
31480 ; SSSE3-NEXT: pand %xmm1, %xmm2
31481 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31482 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
31483 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
31484 ; SSSE3-NEXT: psrlw $4, %xmm0
31485 ; SSSE3-NEXT: pand %xmm1, %xmm0
31486 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
31487 ; SSSE3-NEXT: paddb %xmm4, %xmm3
31488 ; SSSE3-NEXT: pxor %xmm0, %xmm0
31489 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
31490 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
31491 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
31494 ; SSE41-LABEL: ugt_49_v2i64:
31496 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31497 ; SSE41-NEXT: movdqa %xmm0, %xmm2
31498 ; SSE41-NEXT: pand %xmm1, %xmm2
31499 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31500 ; SSE41-NEXT: movdqa %xmm3, %xmm4
31501 ; SSE41-NEXT: pshufb %xmm2, %xmm4
31502 ; SSE41-NEXT: psrlw $4, %xmm0
31503 ; SSE41-NEXT: pand %xmm1, %xmm0
31504 ; SSE41-NEXT: pshufb %xmm0, %xmm3
31505 ; SSE41-NEXT: paddb %xmm4, %xmm3
31506 ; SSE41-NEXT: pxor %xmm0, %xmm0
31507 ; SSE41-NEXT: psadbw %xmm3, %xmm0
31508 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
31509 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
31512 ; AVX1-LABEL: ugt_49_v2i64:
31514 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31515 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
31516 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31517 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
31518 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
31519 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
31520 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
31521 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
31522 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
31523 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31524 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
31527 ; AVX2-LABEL: ugt_49_v2i64:
31529 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31530 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
31531 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31532 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
31533 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
31534 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
31535 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
31536 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
31537 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
31538 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31539 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
31542 ; AVX512VPOPCNTDQ-LABEL: ugt_49_v2i64:
31543 ; AVX512VPOPCNTDQ: # %bb.0:
31544 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
31545 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
31546 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
31547 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
31548 ; AVX512VPOPCNTDQ-NEXT: retq
31550 ; AVX512VPOPCNTDQVL-LABEL: ugt_49_v2i64:
31551 ; AVX512VPOPCNTDQVL: # %bb.0:
31552 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
31553 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [49,49]
31554 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
31555 ; AVX512VPOPCNTDQVL-NEXT: retq
31557 ; BITALG_NOVLX-LABEL: ugt_49_v2i64:
31558 ; BITALG_NOVLX: # %bb.0:
31559 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
31560 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
31561 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
31562 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31563 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
31564 ; BITALG_NOVLX-NEXT: vzeroupper
31565 ; BITALG_NOVLX-NEXT: retq
31567 ; BITALG-LABEL: ugt_49_v2i64:
31569 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
31570 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
31571 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31572 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [49,49]
31573 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
31574 ; BITALG-NEXT: retq
31575 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
31576 %3 = icmp ugt <2 x i64> %2, <i64 49, i64 49>
31577 %4 = sext <2 x i1> %3 to <2 x i64>
31581 define <2 x i64> @ult_50_v2i64(<2 x i64> %0) {
31582 ; SSE2-LABEL: ult_50_v2i64:
31584 ; SSE2-NEXT: movdqa %xmm0, %xmm1
31585 ; SSE2-NEXT: psrlw $1, %xmm1
31586 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31587 ; SSE2-NEXT: psubb %xmm1, %xmm0
31588 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31589 ; SSE2-NEXT: movdqa %xmm0, %xmm2
31590 ; SSE2-NEXT: pand %xmm1, %xmm2
31591 ; SSE2-NEXT: psrlw $2, %xmm0
31592 ; SSE2-NEXT: pand %xmm1, %xmm0
31593 ; SSE2-NEXT: paddb %xmm2, %xmm0
31594 ; SSE2-NEXT: movdqa %xmm0, %xmm1
31595 ; SSE2-NEXT: psrlw $4, %xmm1
31596 ; SSE2-NEXT: paddb %xmm0, %xmm1
31597 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31598 ; SSE2-NEXT: pxor %xmm0, %xmm0
31599 ; SSE2-NEXT: psadbw %xmm1, %xmm0
31600 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
31601 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [50,50,50,50]
31602 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
31605 ; SSE3-LABEL: ult_50_v2i64:
31607 ; SSE3-NEXT: movdqa %xmm0, %xmm1
31608 ; SSE3-NEXT: psrlw $1, %xmm1
31609 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31610 ; SSE3-NEXT: psubb %xmm1, %xmm0
31611 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31612 ; SSE3-NEXT: movdqa %xmm0, %xmm2
31613 ; SSE3-NEXT: pand %xmm1, %xmm2
31614 ; SSE3-NEXT: psrlw $2, %xmm0
31615 ; SSE3-NEXT: pand %xmm1, %xmm0
31616 ; SSE3-NEXT: paddb %xmm2, %xmm0
31617 ; SSE3-NEXT: movdqa %xmm0, %xmm1
31618 ; SSE3-NEXT: psrlw $4, %xmm1
31619 ; SSE3-NEXT: paddb %xmm0, %xmm1
31620 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31621 ; SSE3-NEXT: pxor %xmm0, %xmm0
31622 ; SSE3-NEXT: psadbw %xmm1, %xmm0
31623 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
31624 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [50,50,50,50]
31625 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
31628 ; SSSE3-LABEL: ult_50_v2i64:
31630 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31631 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
31632 ; SSSE3-NEXT: pand %xmm1, %xmm2
31633 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31634 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
31635 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
31636 ; SSSE3-NEXT: psrlw $4, %xmm0
31637 ; SSSE3-NEXT: pand %xmm1, %xmm0
31638 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
31639 ; SSSE3-NEXT: paddb %xmm4, %xmm3
31640 ; SSSE3-NEXT: pxor %xmm0, %xmm0
31641 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
31642 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
31643 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [50,50,50,50]
31644 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
31647 ; SSE41-LABEL: ult_50_v2i64:
31649 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31650 ; SSE41-NEXT: movdqa %xmm0, %xmm2
31651 ; SSE41-NEXT: pand %xmm1, %xmm2
31652 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31653 ; SSE41-NEXT: movdqa %xmm3, %xmm4
31654 ; SSE41-NEXT: pshufb %xmm2, %xmm4
31655 ; SSE41-NEXT: psrlw $4, %xmm0
31656 ; SSE41-NEXT: pand %xmm1, %xmm0
31657 ; SSE41-NEXT: pshufb %xmm0, %xmm3
31658 ; SSE41-NEXT: paddb %xmm4, %xmm3
31659 ; SSE41-NEXT: pxor %xmm0, %xmm0
31660 ; SSE41-NEXT: psadbw %xmm3, %xmm0
31661 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
31662 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [50,50,50,50]
31663 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
31666 ; AVX1-LABEL: ult_50_v2i64:
31668 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31669 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
31670 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31671 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
31672 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
31673 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
31674 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
31675 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
31676 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
31677 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31678 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [50,50]
31679 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
31682 ; AVX2-LABEL: ult_50_v2i64:
31684 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31685 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
31686 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31687 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
31688 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
31689 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
31690 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
31691 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
31692 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
31693 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31694 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [50,50]
31695 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
31698 ; AVX512VPOPCNTDQ-LABEL: ult_50_v2i64:
31699 ; AVX512VPOPCNTDQ: # %bb.0:
31700 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
31701 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
31702 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [50,50]
31703 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
31704 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
31705 ; AVX512VPOPCNTDQ-NEXT: retq
31707 ; AVX512VPOPCNTDQVL-LABEL: ult_50_v2i64:
31708 ; AVX512VPOPCNTDQVL: # %bb.0:
31709 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
31710 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [50,50]
31711 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
31712 ; AVX512VPOPCNTDQVL-NEXT: retq
31714 ; BITALG_NOVLX-LABEL: ult_50_v2i64:
31715 ; BITALG_NOVLX: # %bb.0:
31716 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
31717 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
31718 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
31719 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31720 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [50,50]
31721 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
31722 ; BITALG_NOVLX-NEXT: vzeroupper
31723 ; BITALG_NOVLX-NEXT: retq
31725 ; BITALG-LABEL: ult_50_v2i64:
31727 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
31728 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
31729 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31730 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [50,50]
31731 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
31732 ; BITALG-NEXT: retq
31733 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
31734 %3 = icmp ult <2 x i64> %2, <i64 50, i64 50>
31735 %4 = sext <2 x i1> %3 to <2 x i64>
31739 define <2 x i64> @ugt_50_v2i64(<2 x i64> %0) {
31740 ; SSE2-LABEL: ugt_50_v2i64:
31742 ; SSE2-NEXT: movdqa %xmm0, %xmm1
31743 ; SSE2-NEXT: psrlw $1, %xmm1
31744 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31745 ; SSE2-NEXT: psubb %xmm1, %xmm0
31746 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31747 ; SSE2-NEXT: movdqa %xmm0, %xmm2
31748 ; SSE2-NEXT: pand %xmm1, %xmm2
31749 ; SSE2-NEXT: psrlw $2, %xmm0
31750 ; SSE2-NEXT: pand %xmm1, %xmm0
31751 ; SSE2-NEXT: paddb %xmm2, %xmm0
31752 ; SSE2-NEXT: movdqa %xmm0, %xmm1
31753 ; SSE2-NEXT: psrlw $4, %xmm1
31754 ; SSE2-NEXT: paddb %xmm0, %xmm1
31755 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31756 ; SSE2-NEXT: pxor %xmm0, %xmm0
31757 ; SSE2-NEXT: psadbw %xmm1, %xmm0
31758 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
31759 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
31762 ; SSE3-LABEL: ugt_50_v2i64:
31764 ; SSE3-NEXT: movdqa %xmm0, %xmm1
31765 ; SSE3-NEXT: psrlw $1, %xmm1
31766 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31767 ; SSE3-NEXT: psubb %xmm1, %xmm0
31768 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31769 ; SSE3-NEXT: movdqa %xmm0, %xmm2
31770 ; SSE3-NEXT: pand %xmm1, %xmm2
31771 ; SSE3-NEXT: psrlw $2, %xmm0
31772 ; SSE3-NEXT: pand %xmm1, %xmm0
31773 ; SSE3-NEXT: paddb %xmm2, %xmm0
31774 ; SSE3-NEXT: movdqa %xmm0, %xmm1
31775 ; SSE3-NEXT: psrlw $4, %xmm1
31776 ; SSE3-NEXT: paddb %xmm0, %xmm1
31777 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31778 ; SSE3-NEXT: pxor %xmm0, %xmm0
31779 ; SSE3-NEXT: psadbw %xmm1, %xmm0
31780 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
31781 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
31784 ; SSSE3-LABEL: ugt_50_v2i64:
31786 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31787 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
31788 ; SSSE3-NEXT: pand %xmm1, %xmm2
31789 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31790 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
31791 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
31792 ; SSSE3-NEXT: psrlw $4, %xmm0
31793 ; SSSE3-NEXT: pand %xmm1, %xmm0
31794 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
31795 ; SSSE3-NEXT: paddb %xmm4, %xmm3
31796 ; SSSE3-NEXT: pxor %xmm0, %xmm0
31797 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
31798 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
31799 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
31802 ; SSE41-LABEL: ugt_50_v2i64:
31804 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31805 ; SSE41-NEXT: movdqa %xmm0, %xmm2
31806 ; SSE41-NEXT: pand %xmm1, %xmm2
31807 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31808 ; SSE41-NEXT: movdqa %xmm3, %xmm4
31809 ; SSE41-NEXT: pshufb %xmm2, %xmm4
31810 ; SSE41-NEXT: psrlw $4, %xmm0
31811 ; SSE41-NEXT: pand %xmm1, %xmm0
31812 ; SSE41-NEXT: pshufb %xmm0, %xmm3
31813 ; SSE41-NEXT: paddb %xmm4, %xmm3
31814 ; SSE41-NEXT: pxor %xmm0, %xmm0
31815 ; SSE41-NEXT: psadbw %xmm3, %xmm0
31816 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
31817 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
31820 ; AVX1-LABEL: ugt_50_v2i64:
31822 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31823 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
31824 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31825 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
31826 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
31827 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
31828 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
31829 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
31830 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
31831 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31832 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
31835 ; AVX2-LABEL: ugt_50_v2i64:
31837 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31838 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
31839 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31840 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
31841 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
31842 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
31843 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
31844 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
31845 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
31846 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31847 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
31850 ; AVX512VPOPCNTDQ-LABEL: ugt_50_v2i64:
31851 ; AVX512VPOPCNTDQ: # %bb.0:
31852 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
31853 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
31854 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
31855 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
31856 ; AVX512VPOPCNTDQ-NEXT: retq
31858 ; AVX512VPOPCNTDQVL-LABEL: ugt_50_v2i64:
31859 ; AVX512VPOPCNTDQVL: # %bb.0:
31860 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
31861 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [50,50]
31862 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
31863 ; AVX512VPOPCNTDQVL-NEXT: retq
31865 ; BITALG_NOVLX-LABEL: ugt_50_v2i64:
31866 ; BITALG_NOVLX: # %bb.0:
31867 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
31868 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
31869 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
31870 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31871 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
31872 ; BITALG_NOVLX-NEXT: vzeroupper
31873 ; BITALG_NOVLX-NEXT: retq
31875 ; BITALG-LABEL: ugt_50_v2i64:
31877 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
31878 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
31879 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31880 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [50,50]
31881 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
31882 ; BITALG-NEXT: retq
31883 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
31884 %3 = icmp ugt <2 x i64> %2, <i64 50, i64 50>
31885 %4 = sext <2 x i1> %3 to <2 x i64>
31889 define <2 x i64> @ult_51_v2i64(<2 x i64> %0) {
31890 ; SSE2-LABEL: ult_51_v2i64:
31892 ; SSE2-NEXT: movdqa %xmm0, %xmm1
31893 ; SSE2-NEXT: psrlw $1, %xmm1
31894 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31895 ; SSE2-NEXT: psubb %xmm1, %xmm0
31896 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31897 ; SSE2-NEXT: movdqa %xmm0, %xmm2
31898 ; SSE2-NEXT: pand %xmm1, %xmm2
31899 ; SSE2-NEXT: psrlw $2, %xmm0
31900 ; SSE2-NEXT: pand %xmm1, %xmm0
31901 ; SSE2-NEXT: paddb %xmm2, %xmm0
31902 ; SSE2-NEXT: movdqa %xmm0, %xmm1
31903 ; SSE2-NEXT: psrlw $4, %xmm1
31904 ; SSE2-NEXT: paddb %xmm0, %xmm1
31905 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31906 ; SSE2-NEXT: pxor %xmm0, %xmm0
31907 ; SSE2-NEXT: psadbw %xmm1, %xmm0
31908 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
31909 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51]
31910 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
31913 ; SSE3-LABEL: ult_51_v2i64:
31915 ; SSE3-NEXT: movdqa %xmm0, %xmm1
31916 ; SSE3-NEXT: psrlw $1, %xmm1
31917 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31918 ; SSE3-NEXT: psubb %xmm1, %xmm0
31919 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
31920 ; SSE3-NEXT: movdqa %xmm0, %xmm2
31921 ; SSE3-NEXT: pand %xmm1, %xmm2
31922 ; SSE3-NEXT: psrlw $2, %xmm0
31923 ; SSE3-NEXT: pand %xmm1, %xmm0
31924 ; SSE3-NEXT: paddb %xmm2, %xmm0
31925 ; SSE3-NEXT: movdqa %xmm0, %xmm1
31926 ; SSE3-NEXT: psrlw $4, %xmm1
31927 ; SSE3-NEXT: paddb %xmm0, %xmm1
31928 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
31929 ; SSE3-NEXT: pxor %xmm0, %xmm0
31930 ; SSE3-NEXT: psadbw %xmm1, %xmm0
31931 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
31932 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51]
31933 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
31936 ; SSSE3-LABEL: ult_51_v2i64:
31938 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31939 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
31940 ; SSSE3-NEXT: pand %xmm1, %xmm2
31941 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31942 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
31943 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
31944 ; SSSE3-NEXT: psrlw $4, %xmm0
31945 ; SSSE3-NEXT: pand %xmm1, %xmm0
31946 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
31947 ; SSSE3-NEXT: paddb %xmm4, %xmm3
31948 ; SSSE3-NEXT: pxor %xmm0, %xmm0
31949 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
31950 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
31951 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51]
31952 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
31955 ; SSE41-LABEL: ult_51_v2i64:
31957 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31958 ; SSE41-NEXT: movdqa %xmm0, %xmm2
31959 ; SSE41-NEXT: pand %xmm1, %xmm2
31960 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31961 ; SSE41-NEXT: movdqa %xmm3, %xmm4
31962 ; SSE41-NEXT: pshufb %xmm2, %xmm4
31963 ; SSE41-NEXT: psrlw $4, %xmm0
31964 ; SSE41-NEXT: pand %xmm1, %xmm0
31965 ; SSE41-NEXT: pshufb %xmm0, %xmm3
31966 ; SSE41-NEXT: paddb %xmm4, %xmm3
31967 ; SSE41-NEXT: pxor %xmm0, %xmm0
31968 ; SSE41-NEXT: psadbw %xmm3, %xmm0
31969 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
31970 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [51,51,51,51]
31971 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
31974 ; AVX1-LABEL: ult_51_v2i64:
31976 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31977 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
31978 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31979 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
31980 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
31981 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
31982 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
31983 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
31984 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
31985 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
31986 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [51,51]
31987 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
31990 ; AVX2-LABEL: ult_51_v2i64:
31992 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
31993 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
31994 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
31995 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
31996 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
31997 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
31998 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
31999 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
32000 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
32001 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32002 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [51,51]
32003 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32006 ; AVX512VPOPCNTDQ-LABEL: ult_51_v2i64:
32007 ; AVX512VPOPCNTDQ: # %bb.0:
32008 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
32009 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
32010 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [51,51]
32011 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32012 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
32013 ; AVX512VPOPCNTDQ-NEXT: retq
32015 ; AVX512VPOPCNTDQVL-LABEL: ult_51_v2i64:
32016 ; AVX512VPOPCNTDQVL: # %bb.0:
32017 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
32018 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [51,51]
32019 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32020 ; AVX512VPOPCNTDQVL-NEXT: retq
32022 ; BITALG_NOVLX-LABEL: ult_51_v2i64:
32023 ; BITALG_NOVLX: # %bb.0:
32024 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
32025 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
32026 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
32027 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32028 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [51,51]
32029 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32030 ; BITALG_NOVLX-NEXT: vzeroupper
32031 ; BITALG_NOVLX-NEXT: retq
32033 ; BITALG-LABEL: ult_51_v2i64:
32035 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
32036 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
32037 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32038 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [51,51]
32039 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32040 ; BITALG-NEXT: retq
32041 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
32042 %3 = icmp ult <2 x i64> %2, <i64 51, i64 51>
32043 %4 = sext <2 x i1> %3 to <2 x i64>
32047 define <2 x i64> @ugt_51_v2i64(<2 x i64> %0) {
32048 ; SSE2-LABEL: ugt_51_v2i64:
32050 ; SSE2-NEXT: movdqa %xmm0, %xmm1
32051 ; SSE2-NEXT: psrlw $1, %xmm1
32052 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32053 ; SSE2-NEXT: psubb %xmm1, %xmm0
32054 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32055 ; SSE2-NEXT: movdqa %xmm0, %xmm2
32056 ; SSE2-NEXT: pand %xmm1, %xmm2
32057 ; SSE2-NEXT: psrlw $2, %xmm0
32058 ; SSE2-NEXT: pand %xmm1, %xmm0
32059 ; SSE2-NEXT: paddb %xmm2, %xmm0
32060 ; SSE2-NEXT: movdqa %xmm0, %xmm1
32061 ; SSE2-NEXT: psrlw $4, %xmm1
32062 ; SSE2-NEXT: paddb %xmm0, %xmm1
32063 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32064 ; SSE2-NEXT: pxor %xmm0, %xmm0
32065 ; SSE2-NEXT: psadbw %xmm1, %xmm0
32066 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
32067 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
32070 ; SSE3-LABEL: ugt_51_v2i64:
32072 ; SSE3-NEXT: movdqa %xmm0, %xmm1
32073 ; SSE3-NEXT: psrlw $1, %xmm1
32074 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32075 ; SSE3-NEXT: psubb %xmm1, %xmm0
32076 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32077 ; SSE3-NEXT: movdqa %xmm0, %xmm2
32078 ; SSE3-NEXT: pand %xmm1, %xmm2
32079 ; SSE3-NEXT: psrlw $2, %xmm0
32080 ; SSE3-NEXT: pand %xmm1, %xmm0
32081 ; SSE3-NEXT: paddb %xmm2, %xmm0
32082 ; SSE3-NEXT: movdqa %xmm0, %xmm1
32083 ; SSE3-NEXT: psrlw $4, %xmm1
32084 ; SSE3-NEXT: paddb %xmm0, %xmm1
32085 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32086 ; SSE3-NEXT: pxor %xmm0, %xmm0
32087 ; SSE3-NEXT: psadbw %xmm1, %xmm0
32088 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
32089 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
32092 ; SSSE3-LABEL: ugt_51_v2i64:
32094 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32095 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
32096 ; SSSE3-NEXT: pand %xmm1, %xmm2
32097 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32098 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
32099 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
32100 ; SSSE3-NEXT: psrlw $4, %xmm0
32101 ; SSSE3-NEXT: pand %xmm1, %xmm0
32102 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
32103 ; SSSE3-NEXT: paddb %xmm4, %xmm3
32104 ; SSSE3-NEXT: pxor %xmm0, %xmm0
32105 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
32106 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
32107 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
32110 ; SSE41-LABEL: ugt_51_v2i64:
32112 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32113 ; SSE41-NEXT: movdqa %xmm0, %xmm2
32114 ; SSE41-NEXT: pand %xmm1, %xmm2
32115 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32116 ; SSE41-NEXT: movdqa %xmm3, %xmm4
32117 ; SSE41-NEXT: pshufb %xmm2, %xmm4
32118 ; SSE41-NEXT: psrlw $4, %xmm0
32119 ; SSE41-NEXT: pand %xmm1, %xmm0
32120 ; SSE41-NEXT: pshufb %xmm0, %xmm3
32121 ; SSE41-NEXT: paddb %xmm4, %xmm3
32122 ; SSE41-NEXT: pxor %xmm0, %xmm0
32123 ; SSE41-NEXT: psadbw %xmm3, %xmm0
32124 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
32125 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
32128 ; AVX1-LABEL: ugt_51_v2i64:
32130 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32131 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
32132 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32133 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
32134 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
32135 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
32136 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
32137 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
32138 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
32139 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32140 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
32143 ; AVX2-LABEL: ugt_51_v2i64:
32145 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32146 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
32147 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32148 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
32149 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
32150 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
32151 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
32152 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
32153 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
32154 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32155 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
32158 ; AVX512VPOPCNTDQ-LABEL: ugt_51_v2i64:
32159 ; AVX512VPOPCNTDQ: # %bb.0:
32160 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
32161 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
32162 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
32163 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
32164 ; AVX512VPOPCNTDQ-NEXT: retq
32166 ; AVX512VPOPCNTDQVL-LABEL: ugt_51_v2i64:
32167 ; AVX512VPOPCNTDQVL: # %bb.0:
32168 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
32169 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [51,51]
32170 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
32171 ; AVX512VPOPCNTDQVL-NEXT: retq
32173 ; BITALG_NOVLX-LABEL: ugt_51_v2i64:
32174 ; BITALG_NOVLX: # %bb.0:
32175 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
32176 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
32177 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
32178 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32179 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
32180 ; BITALG_NOVLX-NEXT: vzeroupper
32181 ; BITALG_NOVLX-NEXT: retq
32183 ; BITALG-LABEL: ugt_51_v2i64:
32185 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
32186 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
32187 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32188 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [51,51]
32189 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
32190 ; BITALG-NEXT: retq
32191 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
32192 %3 = icmp ugt <2 x i64> %2, <i64 51, i64 51>
32193 %4 = sext <2 x i1> %3 to <2 x i64>
32197 define <2 x i64> @ult_52_v2i64(<2 x i64> %0) {
32198 ; SSE2-LABEL: ult_52_v2i64:
32200 ; SSE2-NEXT: movdqa %xmm0, %xmm1
32201 ; SSE2-NEXT: psrlw $1, %xmm1
32202 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32203 ; SSE2-NEXT: psubb %xmm1, %xmm0
32204 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32205 ; SSE2-NEXT: movdqa %xmm0, %xmm2
32206 ; SSE2-NEXT: pand %xmm1, %xmm2
32207 ; SSE2-NEXT: psrlw $2, %xmm0
32208 ; SSE2-NEXT: pand %xmm1, %xmm0
32209 ; SSE2-NEXT: paddb %xmm2, %xmm0
32210 ; SSE2-NEXT: movdqa %xmm0, %xmm1
32211 ; SSE2-NEXT: psrlw $4, %xmm1
32212 ; SSE2-NEXT: paddb %xmm0, %xmm1
32213 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32214 ; SSE2-NEXT: pxor %xmm0, %xmm0
32215 ; SSE2-NEXT: psadbw %xmm1, %xmm0
32216 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
32217 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [52,52,52,52]
32218 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
32221 ; SSE3-LABEL: ult_52_v2i64:
32223 ; SSE3-NEXT: movdqa %xmm0, %xmm1
32224 ; SSE3-NEXT: psrlw $1, %xmm1
32225 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32226 ; SSE3-NEXT: psubb %xmm1, %xmm0
32227 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32228 ; SSE3-NEXT: movdqa %xmm0, %xmm2
32229 ; SSE3-NEXT: pand %xmm1, %xmm2
32230 ; SSE3-NEXT: psrlw $2, %xmm0
32231 ; SSE3-NEXT: pand %xmm1, %xmm0
32232 ; SSE3-NEXT: paddb %xmm2, %xmm0
32233 ; SSE3-NEXT: movdqa %xmm0, %xmm1
32234 ; SSE3-NEXT: psrlw $4, %xmm1
32235 ; SSE3-NEXT: paddb %xmm0, %xmm1
32236 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32237 ; SSE3-NEXT: pxor %xmm0, %xmm0
32238 ; SSE3-NEXT: psadbw %xmm1, %xmm0
32239 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
32240 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [52,52,52,52]
32241 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
32244 ; SSSE3-LABEL: ult_52_v2i64:
32246 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32247 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
32248 ; SSSE3-NEXT: pand %xmm1, %xmm2
32249 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32250 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
32251 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
32252 ; SSSE3-NEXT: psrlw $4, %xmm0
32253 ; SSSE3-NEXT: pand %xmm1, %xmm0
32254 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
32255 ; SSSE3-NEXT: paddb %xmm4, %xmm3
32256 ; SSSE3-NEXT: pxor %xmm0, %xmm0
32257 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
32258 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
32259 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [52,52,52,52]
32260 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
32263 ; SSE41-LABEL: ult_52_v2i64:
32265 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32266 ; SSE41-NEXT: movdqa %xmm0, %xmm2
32267 ; SSE41-NEXT: pand %xmm1, %xmm2
32268 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32269 ; SSE41-NEXT: movdqa %xmm3, %xmm4
32270 ; SSE41-NEXT: pshufb %xmm2, %xmm4
32271 ; SSE41-NEXT: psrlw $4, %xmm0
32272 ; SSE41-NEXT: pand %xmm1, %xmm0
32273 ; SSE41-NEXT: pshufb %xmm0, %xmm3
32274 ; SSE41-NEXT: paddb %xmm4, %xmm3
32275 ; SSE41-NEXT: pxor %xmm0, %xmm0
32276 ; SSE41-NEXT: psadbw %xmm3, %xmm0
32277 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
32278 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [52,52,52,52]
32279 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
32282 ; AVX1-LABEL: ult_52_v2i64:
32284 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32285 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
32286 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32287 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
32288 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
32289 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
32290 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
32291 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
32292 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
32293 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32294 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [52,52]
32295 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32298 ; AVX2-LABEL: ult_52_v2i64:
32300 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32301 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
32302 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32303 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
32304 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
32305 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
32306 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
32307 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
32308 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
32309 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32310 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [52,52]
32311 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32314 ; AVX512VPOPCNTDQ-LABEL: ult_52_v2i64:
32315 ; AVX512VPOPCNTDQ: # %bb.0:
32316 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
32317 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
32318 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [52,52]
32319 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32320 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
32321 ; AVX512VPOPCNTDQ-NEXT: retq
32323 ; AVX512VPOPCNTDQVL-LABEL: ult_52_v2i64:
32324 ; AVX512VPOPCNTDQVL: # %bb.0:
32325 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
32326 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [52,52]
32327 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32328 ; AVX512VPOPCNTDQVL-NEXT: retq
32330 ; BITALG_NOVLX-LABEL: ult_52_v2i64:
32331 ; BITALG_NOVLX: # %bb.0:
32332 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
32333 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
32334 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
32335 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32336 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [52,52]
32337 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32338 ; BITALG_NOVLX-NEXT: vzeroupper
32339 ; BITALG_NOVLX-NEXT: retq
32341 ; BITALG-LABEL: ult_52_v2i64:
32343 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
32344 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
32345 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32346 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [52,52]
32347 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32348 ; BITALG-NEXT: retq
32349 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
32350 %3 = icmp ult <2 x i64> %2, <i64 52, i64 52>
32351 %4 = sext <2 x i1> %3 to <2 x i64>
32355 define <2 x i64> @ugt_52_v2i64(<2 x i64> %0) {
32356 ; SSE2-LABEL: ugt_52_v2i64:
32358 ; SSE2-NEXT: movdqa %xmm0, %xmm1
32359 ; SSE2-NEXT: psrlw $1, %xmm1
32360 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32361 ; SSE2-NEXT: psubb %xmm1, %xmm0
32362 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32363 ; SSE2-NEXT: movdqa %xmm0, %xmm2
32364 ; SSE2-NEXT: pand %xmm1, %xmm2
32365 ; SSE2-NEXT: psrlw $2, %xmm0
32366 ; SSE2-NEXT: pand %xmm1, %xmm0
32367 ; SSE2-NEXT: paddb %xmm2, %xmm0
32368 ; SSE2-NEXT: movdqa %xmm0, %xmm1
32369 ; SSE2-NEXT: psrlw $4, %xmm1
32370 ; SSE2-NEXT: paddb %xmm0, %xmm1
32371 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32372 ; SSE2-NEXT: pxor %xmm0, %xmm0
32373 ; SSE2-NEXT: psadbw %xmm1, %xmm0
32374 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
32375 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
32378 ; SSE3-LABEL: ugt_52_v2i64:
32380 ; SSE3-NEXT: movdqa %xmm0, %xmm1
32381 ; SSE3-NEXT: psrlw $1, %xmm1
32382 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32383 ; SSE3-NEXT: psubb %xmm1, %xmm0
32384 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32385 ; SSE3-NEXT: movdqa %xmm0, %xmm2
32386 ; SSE3-NEXT: pand %xmm1, %xmm2
32387 ; SSE3-NEXT: psrlw $2, %xmm0
32388 ; SSE3-NEXT: pand %xmm1, %xmm0
32389 ; SSE3-NEXT: paddb %xmm2, %xmm0
32390 ; SSE3-NEXT: movdqa %xmm0, %xmm1
32391 ; SSE3-NEXT: psrlw $4, %xmm1
32392 ; SSE3-NEXT: paddb %xmm0, %xmm1
32393 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32394 ; SSE3-NEXT: pxor %xmm0, %xmm0
32395 ; SSE3-NEXT: psadbw %xmm1, %xmm0
32396 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
32397 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
32400 ; SSSE3-LABEL: ugt_52_v2i64:
32402 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32403 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
32404 ; SSSE3-NEXT: pand %xmm1, %xmm2
32405 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32406 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
32407 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
32408 ; SSSE3-NEXT: psrlw $4, %xmm0
32409 ; SSSE3-NEXT: pand %xmm1, %xmm0
32410 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
32411 ; SSSE3-NEXT: paddb %xmm4, %xmm3
32412 ; SSSE3-NEXT: pxor %xmm0, %xmm0
32413 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
32414 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
32415 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
32418 ; SSE41-LABEL: ugt_52_v2i64:
32420 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32421 ; SSE41-NEXT: movdqa %xmm0, %xmm2
32422 ; SSE41-NEXT: pand %xmm1, %xmm2
32423 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32424 ; SSE41-NEXT: movdqa %xmm3, %xmm4
32425 ; SSE41-NEXT: pshufb %xmm2, %xmm4
32426 ; SSE41-NEXT: psrlw $4, %xmm0
32427 ; SSE41-NEXT: pand %xmm1, %xmm0
32428 ; SSE41-NEXT: pshufb %xmm0, %xmm3
32429 ; SSE41-NEXT: paddb %xmm4, %xmm3
32430 ; SSE41-NEXT: pxor %xmm0, %xmm0
32431 ; SSE41-NEXT: psadbw %xmm3, %xmm0
32432 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
32433 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
32436 ; AVX1-LABEL: ugt_52_v2i64:
32438 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32439 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
32440 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32441 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
32442 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
32443 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
32444 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
32445 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
32446 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
32447 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32448 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
32451 ; AVX2-LABEL: ugt_52_v2i64:
32453 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32454 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
32455 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32456 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
32457 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
32458 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
32459 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
32460 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
32461 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
32462 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32463 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
32466 ; AVX512VPOPCNTDQ-LABEL: ugt_52_v2i64:
32467 ; AVX512VPOPCNTDQ: # %bb.0:
32468 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
32469 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
32470 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
32471 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
32472 ; AVX512VPOPCNTDQ-NEXT: retq
32474 ; AVX512VPOPCNTDQVL-LABEL: ugt_52_v2i64:
32475 ; AVX512VPOPCNTDQVL: # %bb.0:
32476 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
32477 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [52,52]
32478 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
32479 ; AVX512VPOPCNTDQVL-NEXT: retq
32481 ; BITALG_NOVLX-LABEL: ugt_52_v2i64:
32482 ; BITALG_NOVLX: # %bb.0:
32483 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
32484 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
32485 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
32486 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32487 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
32488 ; BITALG_NOVLX-NEXT: vzeroupper
32489 ; BITALG_NOVLX-NEXT: retq
32491 ; BITALG-LABEL: ugt_52_v2i64:
32493 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
32494 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
32495 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32496 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [52,52]
32497 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
32498 ; BITALG-NEXT: retq
32499 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
32500 %3 = icmp ugt <2 x i64> %2, <i64 52, i64 52>
32501 %4 = sext <2 x i1> %3 to <2 x i64>
32505 define <2 x i64> @ult_53_v2i64(<2 x i64> %0) {
32506 ; SSE2-LABEL: ult_53_v2i64:
32508 ; SSE2-NEXT: movdqa %xmm0, %xmm1
32509 ; SSE2-NEXT: psrlw $1, %xmm1
32510 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32511 ; SSE2-NEXT: psubb %xmm1, %xmm0
32512 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32513 ; SSE2-NEXT: movdqa %xmm0, %xmm2
32514 ; SSE2-NEXT: pand %xmm1, %xmm2
32515 ; SSE2-NEXT: psrlw $2, %xmm0
32516 ; SSE2-NEXT: pand %xmm1, %xmm0
32517 ; SSE2-NEXT: paddb %xmm2, %xmm0
32518 ; SSE2-NEXT: movdqa %xmm0, %xmm1
32519 ; SSE2-NEXT: psrlw $4, %xmm1
32520 ; SSE2-NEXT: paddb %xmm0, %xmm1
32521 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32522 ; SSE2-NEXT: pxor %xmm0, %xmm0
32523 ; SSE2-NEXT: psadbw %xmm1, %xmm0
32524 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
32525 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [53,53,53,53]
32526 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
32529 ; SSE3-LABEL: ult_53_v2i64:
32531 ; SSE3-NEXT: movdqa %xmm0, %xmm1
32532 ; SSE3-NEXT: psrlw $1, %xmm1
32533 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32534 ; SSE3-NEXT: psubb %xmm1, %xmm0
32535 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32536 ; SSE3-NEXT: movdqa %xmm0, %xmm2
32537 ; SSE3-NEXT: pand %xmm1, %xmm2
32538 ; SSE3-NEXT: psrlw $2, %xmm0
32539 ; SSE3-NEXT: pand %xmm1, %xmm0
32540 ; SSE3-NEXT: paddb %xmm2, %xmm0
32541 ; SSE3-NEXT: movdqa %xmm0, %xmm1
32542 ; SSE3-NEXT: psrlw $4, %xmm1
32543 ; SSE3-NEXT: paddb %xmm0, %xmm1
32544 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32545 ; SSE3-NEXT: pxor %xmm0, %xmm0
32546 ; SSE3-NEXT: psadbw %xmm1, %xmm0
32547 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
32548 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [53,53,53,53]
32549 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
32552 ; SSSE3-LABEL: ult_53_v2i64:
32554 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32555 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
32556 ; SSSE3-NEXT: pand %xmm1, %xmm2
32557 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32558 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
32559 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
32560 ; SSSE3-NEXT: psrlw $4, %xmm0
32561 ; SSSE3-NEXT: pand %xmm1, %xmm0
32562 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
32563 ; SSSE3-NEXT: paddb %xmm4, %xmm3
32564 ; SSSE3-NEXT: pxor %xmm0, %xmm0
32565 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
32566 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
32567 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [53,53,53,53]
32568 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
32571 ; SSE41-LABEL: ult_53_v2i64:
32573 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32574 ; SSE41-NEXT: movdqa %xmm0, %xmm2
32575 ; SSE41-NEXT: pand %xmm1, %xmm2
32576 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32577 ; SSE41-NEXT: movdqa %xmm3, %xmm4
32578 ; SSE41-NEXT: pshufb %xmm2, %xmm4
32579 ; SSE41-NEXT: psrlw $4, %xmm0
32580 ; SSE41-NEXT: pand %xmm1, %xmm0
32581 ; SSE41-NEXT: pshufb %xmm0, %xmm3
32582 ; SSE41-NEXT: paddb %xmm4, %xmm3
32583 ; SSE41-NEXT: pxor %xmm0, %xmm0
32584 ; SSE41-NEXT: psadbw %xmm3, %xmm0
32585 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
32586 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [53,53,53,53]
32587 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
32590 ; AVX1-LABEL: ult_53_v2i64:
32592 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32593 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
32594 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32595 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
32596 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
32597 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
32598 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
32599 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
32600 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
32601 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32602 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [53,53]
32603 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32606 ; AVX2-LABEL: ult_53_v2i64:
32608 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32609 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
32610 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32611 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
32612 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
32613 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
32614 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
32615 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
32616 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
32617 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32618 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [53,53]
32619 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32622 ; AVX512VPOPCNTDQ-LABEL: ult_53_v2i64:
32623 ; AVX512VPOPCNTDQ: # %bb.0:
32624 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
32625 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
32626 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [53,53]
32627 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32628 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
32629 ; AVX512VPOPCNTDQ-NEXT: retq
32631 ; AVX512VPOPCNTDQVL-LABEL: ult_53_v2i64:
32632 ; AVX512VPOPCNTDQVL: # %bb.0:
32633 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
32634 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [53,53]
32635 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32636 ; AVX512VPOPCNTDQVL-NEXT: retq
32638 ; BITALG_NOVLX-LABEL: ult_53_v2i64:
32639 ; BITALG_NOVLX: # %bb.0:
32640 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
32641 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
32642 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
32643 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32644 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [53,53]
32645 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32646 ; BITALG_NOVLX-NEXT: vzeroupper
32647 ; BITALG_NOVLX-NEXT: retq
32649 ; BITALG-LABEL: ult_53_v2i64:
32651 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
32652 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
32653 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32654 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [53,53]
32655 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32656 ; BITALG-NEXT: retq
32657 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
32658 %3 = icmp ult <2 x i64> %2, <i64 53, i64 53>
32659 %4 = sext <2 x i1> %3 to <2 x i64>
32663 define <2 x i64> @ugt_53_v2i64(<2 x i64> %0) {
32664 ; SSE2-LABEL: ugt_53_v2i64:
32666 ; SSE2-NEXT: movdqa %xmm0, %xmm1
32667 ; SSE2-NEXT: psrlw $1, %xmm1
32668 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32669 ; SSE2-NEXT: psubb %xmm1, %xmm0
32670 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32671 ; SSE2-NEXT: movdqa %xmm0, %xmm2
32672 ; SSE2-NEXT: pand %xmm1, %xmm2
32673 ; SSE2-NEXT: psrlw $2, %xmm0
32674 ; SSE2-NEXT: pand %xmm1, %xmm0
32675 ; SSE2-NEXT: paddb %xmm2, %xmm0
32676 ; SSE2-NEXT: movdqa %xmm0, %xmm1
32677 ; SSE2-NEXT: psrlw $4, %xmm1
32678 ; SSE2-NEXT: paddb %xmm0, %xmm1
32679 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32680 ; SSE2-NEXT: pxor %xmm0, %xmm0
32681 ; SSE2-NEXT: psadbw %xmm1, %xmm0
32682 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
32683 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
32686 ; SSE3-LABEL: ugt_53_v2i64:
32688 ; SSE3-NEXT: movdqa %xmm0, %xmm1
32689 ; SSE3-NEXT: psrlw $1, %xmm1
32690 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32691 ; SSE3-NEXT: psubb %xmm1, %xmm0
32692 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32693 ; SSE3-NEXT: movdqa %xmm0, %xmm2
32694 ; SSE3-NEXT: pand %xmm1, %xmm2
32695 ; SSE3-NEXT: psrlw $2, %xmm0
32696 ; SSE3-NEXT: pand %xmm1, %xmm0
32697 ; SSE3-NEXT: paddb %xmm2, %xmm0
32698 ; SSE3-NEXT: movdqa %xmm0, %xmm1
32699 ; SSE3-NEXT: psrlw $4, %xmm1
32700 ; SSE3-NEXT: paddb %xmm0, %xmm1
32701 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32702 ; SSE3-NEXT: pxor %xmm0, %xmm0
32703 ; SSE3-NEXT: psadbw %xmm1, %xmm0
32704 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
32705 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
32708 ; SSSE3-LABEL: ugt_53_v2i64:
32710 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32711 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
32712 ; SSSE3-NEXT: pand %xmm1, %xmm2
32713 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32714 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
32715 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
32716 ; SSSE3-NEXT: psrlw $4, %xmm0
32717 ; SSSE3-NEXT: pand %xmm1, %xmm0
32718 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
32719 ; SSSE3-NEXT: paddb %xmm4, %xmm3
32720 ; SSSE3-NEXT: pxor %xmm0, %xmm0
32721 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
32722 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
32723 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
32726 ; SSE41-LABEL: ugt_53_v2i64:
32728 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32729 ; SSE41-NEXT: movdqa %xmm0, %xmm2
32730 ; SSE41-NEXT: pand %xmm1, %xmm2
32731 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32732 ; SSE41-NEXT: movdqa %xmm3, %xmm4
32733 ; SSE41-NEXT: pshufb %xmm2, %xmm4
32734 ; SSE41-NEXT: psrlw $4, %xmm0
32735 ; SSE41-NEXT: pand %xmm1, %xmm0
32736 ; SSE41-NEXT: pshufb %xmm0, %xmm3
32737 ; SSE41-NEXT: paddb %xmm4, %xmm3
32738 ; SSE41-NEXT: pxor %xmm0, %xmm0
32739 ; SSE41-NEXT: psadbw %xmm3, %xmm0
32740 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
32741 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
32744 ; AVX1-LABEL: ugt_53_v2i64:
32746 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32747 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
32748 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32749 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
32750 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
32751 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
32752 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
32753 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
32754 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
32755 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32756 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
32759 ; AVX2-LABEL: ugt_53_v2i64:
32761 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32762 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
32763 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32764 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
32765 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
32766 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
32767 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
32768 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
32769 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
32770 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32771 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
32774 ; AVX512VPOPCNTDQ-LABEL: ugt_53_v2i64:
32775 ; AVX512VPOPCNTDQ: # %bb.0:
32776 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
32777 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
32778 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
32779 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
32780 ; AVX512VPOPCNTDQ-NEXT: retq
32782 ; AVX512VPOPCNTDQVL-LABEL: ugt_53_v2i64:
32783 ; AVX512VPOPCNTDQVL: # %bb.0:
32784 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
32785 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [53,53]
32786 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
32787 ; AVX512VPOPCNTDQVL-NEXT: retq
32789 ; BITALG_NOVLX-LABEL: ugt_53_v2i64:
32790 ; BITALG_NOVLX: # %bb.0:
32791 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
32792 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
32793 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
32794 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32795 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
32796 ; BITALG_NOVLX-NEXT: vzeroupper
32797 ; BITALG_NOVLX-NEXT: retq
32799 ; BITALG-LABEL: ugt_53_v2i64:
32801 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
32802 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
32803 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32804 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [53,53]
32805 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
32806 ; BITALG-NEXT: retq
32807 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
32808 %3 = icmp ugt <2 x i64> %2, <i64 53, i64 53>
32809 %4 = sext <2 x i1> %3 to <2 x i64>
32813 define <2 x i64> @ult_54_v2i64(<2 x i64> %0) {
32814 ; SSE2-LABEL: ult_54_v2i64:
32816 ; SSE2-NEXT: movdqa %xmm0, %xmm1
32817 ; SSE2-NEXT: psrlw $1, %xmm1
32818 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32819 ; SSE2-NEXT: psubb %xmm1, %xmm0
32820 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32821 ; SSE2-NEXT: movdqa %xmm0, %xmm2
32822 ; SSE2-NEXT: pand %xmm1, %xmm2
32823 ; SSE2-NEXT: psrlw $2, %xmm0
32824 ; SSE2-NEXT: pand %xmm1, %xmm0
32825 ; SSE2-NEXT: paddb %xmm2, %xmm0
32826 ; SSE2-NEXT: movdqa %xmm0, %xmm1
32827 ; SSE2-NEXT: psrlw $4, %xmm1
32828 ; SSE2-NEXT: paddb %xmm0, %xmm1
32829 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32830 ; SSE2-NEXT: pxor %xmm0, %xmm0
32831 ; SSE2-NEXT: psadbw %xmm1, %xmm0
32832 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
32833 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [54,54,54,54]
32834 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
32837 ; SSE3-LABEL: ult_54_v2i64:
32839 ; SSE3-NEXT: movdqa %xmm0, %xmm1
32840 ; SSE3-NEXT: psrlw $1, %xmm1
32841 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32842 ; SSE3-NEXT: psubb %xmm1, %xmm0
32843 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32844 ; SSE3-NEXT: movdqa %xmm0, %xmm2
32845 ; SSE3-NEXT: pand %xmm1, %xmm2
32846 ; SSE3-NEXT: psrlw $2, %xmm0
32847 ; SSE3-NEXT: pand %xmm1, %xmm0
32848 ; SSE3-NEXT: paddb %xmm2, %xmm0
32849 ; SSE3-NEXT: movdqa %xmm0, %xmm1
32850 ; SSE3-NEXT: psrlw $4, %xmm1
32851 ; SSE3-NEXT: paddb %xmm0, %xmm1
32852 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32853 ; SSE3-NEXT: pxor %xmm0, %xmm0
32854 ; SSE3-NEXT: psadbw %xmm1, %xmm0
32855 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
32856 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [54,54,54,54]
32857 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
32860 ; SSSE3-LABEL: ult_54_v2i64:
32862 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32863 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
32864 ; SSSE3-NEXT: pand %xmm1, %xmm2
32865 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32866 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
32867 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
32868 ; SSSE3-NEXT: psrlw $4, %xmm0
32869 ; SSSE3-NEXT: pand %xmm1, %xmm0
32870 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
32871 ; SSSE3-NEXT: paddb %xmm4, %xmm3
32872 ; SSSE3-NEXT: pxor %xmm0, %xmm0
32873 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
32874 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
32875 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [54,54,54,54]
32876 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
32879 ; SSE41-LABEL: ult_54_v2i64:
32881 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32882 ; SSE41-NEXT: movdqa %xmm0, %xmm2
32883 ; SSE41-NEXT: pand %xmm1, %xmm2
32884 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32885 ; SSE41-NEXT: movdqa %xmm3, %xmm4
32886 ; SSE41-NEXT: pshufb %xmm2, %xmm4
32887 ; SSE41-NEXT: psrlw $4, %xmm0
32888 ; SSE41-NEXT: pand %xmm1, %xmm0
32889 ; SSE41-NEXT: pshufb %xmm0, %xmm3
32890 ; SSE41-NEXT: paddb %xmm4, %xmm3
32891 ; SSE41-NEXT: pxor %xmm0, %xmm0
32892 ; SSE41-NEXT: psadbw %xmm3, %xmm0
32893 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
32894 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [54,54,54,54]
32895 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
32898 ; AVX1-LABEL: ult_54_v2i64:
32900 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32901 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
32902 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32903 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
32904 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
32905 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
32906 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
32907 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
32908 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
32909 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32910 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [54,54]
32911 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32914 ; AVX2-LABEL: ult_54_v2i64:
32916 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
32917 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
32918 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
32919 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
32920 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
32921 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
32922 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
32923 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
32924 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
32925 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32926 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [54,54]
32927 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32930 ; AVX512VPOPCNTDQ-LABEL: ult_54_v2i64:
32931 ; AVX512VPOPCNTDQ: # %bb.0:
32932 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
32933 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
32934 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [54,54]
32935 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32936 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
32937 ; AVX512VPOPCNTDQ-NEXT: retq
32939 ; AVX512VPOPCNTDQVL-LABEL: ult_54_v2i64:
32940 ; AVX512VPOPCNTDQVL: # %bb.0:
32941 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
32942 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [54,54]
32943 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32944 ; AVX512VPOPCNTDQVL-NEXT: retq
32946 ; BITALG_NOVLX-LABEL: ult_54_v2i64:
32947 ; BITALG_NOVLX: # %bb.0:
32948 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
32949 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
32950 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
32951 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32952 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [54,54]
32953 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32954 ; BITALG_NOVLX-NEXT: vzeroupper
32955 ; BITALG_NOVLX-NEXT: retq
32957 ; BITALG-LABEL: ult_54_v2i64:
32959 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
32960 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
32961 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
32962 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [54,54]
32963 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
32964 ; BITALG-NEXT: retq
32965 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
32966 %3 = icmp ult <2 x i64> %2, <i64 54, i64 54>
32967 %4 = sext <2 x i1> %3 to <2 x i64>
32971 define <2 x i64> @ugt_54_v2i64(<2 x i64> %0) {
32972 ; SSE2-LABEL: ugt_54_v2i64:
32974 ; SSE2-NEXT: movdqa %xmm0, %xmm1
32975 ; SSE2-NEXT: psrlw $1, %xmm1
32976 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32977 ; SSE2-NEXT: psubb %xmm1, %xmm0
32978 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
32979 ; SSE2-NEXT: movdqa %xmm0, %xmm2
32980 ; SSE2-NEXT: pand %xmm1, %xmm2
32981 ; SSE2-NEXT: psrlw $2, %xmm0
32982 ; SSE2-NEXT: pand %xmm1, %xmm0
32983 ; SSE2-NEXT: paddb %xmm2, %xmm0
32984 ; SSE2-NEXT: movdqa %xmm0, %xmm1
32985 ; SSE2-NEXT: psrlw $4, %xmm1
32986 ; SSE2-NEXT: paddb %xmm0, %xmm1
32987 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32988 ; SSE2-NEXT: pxor %xmm0, %xmm0
32989 ; SSE2-NEXT: psadbw %xmm1, %xmm0
32990 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
32991 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
32994 ; SSE3-LABEL: ugt_54_v2i64:
32996 ; SSE3-NEXT: movdqa %xmm0, %xmm1
32997 ; SSE3-NEXT: psrlw $1, %xmm1
32998 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
32999 ; SSE3-NEXT: psubb %xmm1, %xmm0
33000 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33001 ; SSE3-NEXT: movdqa %xmm0, %xmm2
33002 ; SSE3-NEXT: pand %xmm1, %xmm2
33003 ; SSE3-NEXT: psrlw $2, %xmm0
33004 ; SSE3-NEXT: pand %xmm1, %xmm0
33005 ; SSE3-NEXT: paddb %xmm2, %xmm0
33006 ; SSE3-NEXT: movdqa %xmm0, %xmm1
33007 ; SSE3-NEXT: psrlw $4, %xmm1
33008 ; SSE3-NEXT: paddb %xmm0, %xmm1
33009 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33010 ; SSE3-NEXT: pxor %xmm0, %xmm0
33011 ; SSE3-NEXT: psadbw %xmm1, %xmm0
33012 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
33013 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
33016 ; SSSE3-LABEL: ugt_54_v2i64:
33018 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33019 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
33020 ; SSSE3-NEXT: pand %xmm1, %xmm2
33021 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33022 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
33023 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
33024 ; SSSE3-NEXT: psrlw $4, %xmm0
33025 ; SSSE3-NEXT: pand %xmm1, %xmm0
33026 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
33027 ; SSSE3-NEXT: paddb %xmm4, %xmm3
33028 ; SSSE3-NEXT: pxor %xmm0, %xmm0
33029 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
33030 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
33031 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
33034 ; SSE41-LABEL: ugt_54_v2i64:
33036 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33037 ; SSE41-NEXT: movdqa %xmm0, %xmm2
33038 ; SSE41-NEXT: pand %xmm1, %xmm2
33039 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33040 ; SSE41-NEXT: movdqa %xmm3, %xmm4
33041 ; SSE41-NEXT: pshufb %xmm2, %xmm4
33042 ; SSE41-NEXT: psrlw $4, %xmm0
33043 ; SSE41-NEXT: pand %xmm1, %xmm0
33044 ; SSE41-NEXT: pshufb %xmm0, %xmm3
33045 ; SSE41-NEXT: paddb %xmm4, %xmm3
33046 ; SSE41-NEXT: pxor %xmm0, %xmm0
33047 ; SSE41-NEXT: psadbw %xmm3, %xmm0
33048 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
33049 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
33052 ; AVX1-LABEL: ugt_54_v2i64:
33054 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33055 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
33056 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33057 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
33058 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
33059 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
33060 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
33061 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
33062 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
33063 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33064 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
33067 ; AVX2-LABEL: ugt_54_v2i64:
33069 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33070 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
33071 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33072 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
33073 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
33074 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
33075 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
33076 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
33077 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
33078 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33079 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
33082 ; AVX512VPOPCNTDQ-LABEL: ugt_54_v2i64:
33083 ; AVX512VPOPCNTDQ: # %bb.0:
33084 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
33085 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
33086 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
33087 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
33088 ; AVX512VPOPCNTDQ-NEXT: retq
33090 ; AVX512VPOPCNTDQVL-LABEL: ugt_54_v2i64:
33091 ; AVX512VPOPCNTDQVL: # %bb.0:
33092 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
33093 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [54,54]
33094 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
33095 ; AVX512VPOPCNTDQVL-NEXT: retq
33097 ; BITALG_NOVLX-LABEL: ugt_54_v2i64:
33098 ; BITALG_NOVLX: # %bb.0:
33099 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
33100 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
33101 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
33102 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33103 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
33104 ; BITALG_NOVLX-NEXT: vzeroupper
33105 ; BITALG_NOVLX-NEXT: retq
33107 ; BITALG-LABEL: ugt_54_v2i64:
33109 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
33110 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
33111 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33112 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [54,54]
33113 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
33114 ; BITALG-NEXT: retq
33115 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
33116 %3 = icmp ugt <2 x i64> %2, <i64 54, i64 54>
33117 %4 = sext <2 x i1> %3 to <2 x i64>
33121 define <2 x i64> @ult_55_v2i64(<2 x i64> %0) {
33122 ; SSE2-LABEL: ult_55_v2i64:
33124 ; SSE2-NEXT: movdqa %xmm0, %xmm1
33125 ; SSE2-NEXT: psrlw $1, %xmm1
33126 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33127 ; SSE2-NEXT: psubb %xmm1, %xmm0
33128 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33129 ; SSE2-NEXT: movdqa %xmm0, %xmm2
33130 ; SSE2-NEXT: pand %xmm1, %xmm2
33131 ; SSE2-NEXT: psrlw $2, %xmm0
33132 ; SSE2-NEXT: pand %xmm1, %xmm0
33133 ; SSE2-NEXT: paddb %xmm2, %xmm0
33134 ; SSE2-NEXT: movdqa %xmm0, %xmm1
33135 ; SSE2-NEXT: psrlw $4, %xmm1
33136 ; SSE2-NEXT: paddb %xmm0, %xmm1
33137 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33138 ; SSE2-NEXT: pxor %xmm0, %xmm0
33139 ; SSE2-NEXT: psadbw %xmm1, %xmm0
33140 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
33141 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [55,55,55,55]
33142 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
33145 ; SSE3-LABEL: ult_55_v2i64:
33147 ; SSE3-NEXT: movdqa %xmm0, %xmm1
33148 ; SSE3-NEXT: psrlw $1, %xmm1
33149 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33150 ; SSE3-NEXT: psubb %xmm1, %xmm0
33151 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33152 ; SSE3-NEXT: movdqa %xmm0, %xmm2
33153 ; SSE3-NEXT: pand %xmm1, %xmm2
33154 ; SSE3-NEXT: psrlw $2, %xmm0
33155 ; SSE3-NEXT: pand %xmm1, %xmm0
33156 ; SSE3-NEXT: paddb %xmm2, %xmm0
33157 ; SSE3-NEXT: movdqa %xmm0, %xmm1
33158 ; SSE3-NEXT: psrlw $4, %xmm1
33159 ; SSE3-NEXT: paddb %xmm0, %xmm1
33160 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33161 ; SSE3-NEXT: pxor %xmm0, %xmm0
33162 ; SSE3-NEXT: psadbw %xmm1, %xmm0
33163 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
33164 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [55,55,55,55]
33165 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
33168 ; SSSE3-LABEL: ult_55_v2i64:
33170 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33171 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
33172 ; SSSE3-NEXT: pand %xmm1, %xmm2
33173 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33174 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
33175 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
33176 ; SSSE3-NEXT: psrlw $4, %xmm0
33177 ; SSSE3-NEXT: pand %xmm1, %xmm0
33178 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
33179 ; SSSE3-NEXT: paddb %xmm4, %xmm3
33180 ; SSSE3-NEXT: pxor %xmm0, %xmm0
33181 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
33182 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
33183 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [55,55,55,55]
33184 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
33187 ; SSE41-LABEL: ult_55_v2i64:
33189 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33190 ; SSE41-NEXT: movdqa %xmm0, %xmm2
33191 ; SSE41-NEXT: pand %xmm1, %xmm2
33192 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33193 ; SSE41-NEXT: movdqa %xmm3, %xmm4
33194 ; SSE41-NEXT: pshufb %xmm2, %xmm4
33195 ; SSE41-NEXT: psrlw $4, %xmm0
33196 ; SSE41-NEXT: pand %xmm1, %xmm0
33197 ; SSE41-NEXT: pshufb %xmm0, %xmm3
33198 ; SSE41-NEXT: paddb %xmm4, %xmm3
33199 ; SSE41-NEXT: pxor %xmm0, %xmm0
33200 ; SSE41-NEXT: psadbw %xmm3, %xmm0
33201 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
33202 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [55,55,55,55]
33203 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
33206 ; AVX1-LABEL: ult_55_v2i64:
33208 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33209 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
33210 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33211 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
33212 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
33213 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
33214 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
33215 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
33216 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
33217 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33218 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [55,55]
33219 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
33222 ; AVX2-LABEL: ult_55_v2i64:
33224 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33225 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
33226 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33227 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
33228 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
33229 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
33230 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
33231 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
33232 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
33233 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33234 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [55,55]
33235 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
33238 ; AVX512VPOPCNTDQ-LABEL: ult_55_v2i64:
33239 ; AVX512VPOPCNTDQ: # %bb.0:
33240 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
33241 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
33242 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [55,55]
33243 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
33244 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
33245 ; AVX512VPOPCNTDQ-NEXT: retq
33247 ; AVX512VPOPCNTDQVL-LABEL: ult_55_v2i64:
33248 ; AVX512VPOPCNTDQVL: # %bb.0:
33249 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
33250 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [55,55]
33251 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
33252 ; AVX512VPOPCNTDQVL-NEXT: retq
33254 ; BITALG_NOVLX-LABEL: ult_55_v2i64:
33255 ; BITALG_NOVLX: # %bb.0:
33256 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
33257 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
33258 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
33259 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33260 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [55,55]
33261 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
33262 ; BITALG_NOVLX-NEXT: vzeroupper
33263 ; BITALG_NOVLX-NEXT: retq
33265 ; BITALG-LABEL: ult_55_v2i64:
33267 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
33268 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
33269 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33270 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [55,55]
33271 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
33272 ; BITALG-NEXT: retq
33273 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
33274 %3 = icmp ult <2 x i64> %2, <i64 55, i64 55>
33275 %4 = sext <2 x i1> %3 to <2 x i64>
33279 define <2 x i64> @ugt_55_v2i64(<2 x i64> %0) {
33280 ; SSE2-LABEL: ugt_55_v2i64:
33282 ; SSE2-NEXT: movdqa %xmm0, %xmm1
33283 ; SSE2-NEXT: psrlw $1, %xmm1
33284 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33285 ; SSE2-NEXT: psubb %xmm1, %xmm0
33286 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33287 ; SSE2-NEXT: movdqa %xmm0, %xmm2
33288 ; SSE2-NEXT: pand %xmm1, %xmm2
33289 ; SSE2-NEXT: psrlw $2, %xmm0
33290 ; SSE2-NEXT: pand %xmm1, %xmm0
33291 ; SSE2-NEXT: paddb %xmm2, %xmm0
33292 ; SSE2-NEXT: movdqa %xmm0, %xmm1
33293 ; SSE2-NEXT: psrlw $4, %xmm1
33294 ; SSE2-NEXT: paddb %xmm0, %xmm1
33295 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33296 ; SSE2-NEXT: pxor %xmm0, %xmm0
33297 ; SSE2-NEXT: psadbw %xmm1, %xmm0
33298 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
33299 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
33302 ; SSE3-LABEL: ugt_55_v2i64:
33304 ; SSE3-NEXT: movdqa %xmm0, %xmm1
33305 ; SSE3-NEXT: psrlw $1, %xmm1
33306 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33307 ; SSE3-NEXT: psubb %xmm1, %xmm0
33308 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33309 ; SSE3-NEXT: movdqa %xmm0, %xmm2
33310 ; SSE3-NEXT: pand %xmm1, %xmm2
33311 ; SSE3-NEXT: psrlw $2, %xmm0
33312 ; SSE3-NEXT: pand %xmm1, %xmm0
33313 ; SSE3-NEXT: paddb %xmm2, %xmm0
33314 ; SSE3-NEXT: movdqa %xmm0, %xmm1
33315 ; SSE3-NEXT: psrlw $4, %xmm1
33316 ; SSE3-NEXT: paddb %xmm0, %xmm1
33317 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33318 ; SSE3-NEXT: pxor %xmm0, %xmm0
33319 ; SSE3-NEXT: psadbw %xmm1, %xmm0
33320 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
33321 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
33324 ; SSSE3-LABEL: ugt_55_v2i64:
33326 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33327 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
33328 ; SSSE3-NEXT: pand %xmm1, %xmm2
33329 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33330 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
33331 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
33332 ; SSSE3-NEXT: psrlw $4, %xmm0
33333 ; SSSE3-NEXT: pand %xmm1, %xmm0
33334 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
33335 ; SSSE3-NEXT: paddb %xmm4, %xmm3
33336 ; SSSE3-NEXT: pxor %xmm0, %xmm0
33337 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
33338 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
33339 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
33342 ; SSE41-LABEL: ugt_55_v2i64:
33344 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33345 ; SSE41-NEXT: movdqa %xmm0, %xmm2
33346 ; SSE41-NEXT: pand %xmm1, %xmm2
33347 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33348 ; SSE41-NEXT: movdqa %xmm3, %xmm4
33349 ; SSE41-NEXT: pshufb %xmm2, %xmm4
33350 ; SSE41-NEXT: psrlw $4, %xmm0
33351 ; SSE41-NEXT: pand %xmm1, %xmm0
33352 ; SSE41-NEXT: pshufb %xmm0, %xmm3
33353 ; SSE41-NEXT: paddb %xmm4, %xmm3
33354 ; SSE41-NEXT: pxor %xmm0, %xmm0
33355 ; SSE41-NEXT: psadbw %xmm3, %xmm0
33356 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
33357 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
33360 ; AVX1-LABEL: ugt_55_v2i64:
33362 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33363 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
33364 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33365 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
33366 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
33367 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
33368 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
33369 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
33370 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
33371 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33372 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
33375 ; AVX2-LABEL: ugt_55_v2i64:
33377 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33378 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
33379 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33380 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
33381 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
33382 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
33383 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
33384 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
33385 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
33386 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33387 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
33390 ; AVX512VPOPCNTDQ-LABEL: ugt_55_v2i64:
33391 ; AVX512VPOPCNTDQ: # %bb.0:
33392 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
33393 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
33394 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
33395 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
33396 ; AVX512VPOPCNTDQ-NEXT: retq
33398 ; AVX512VPOPCNTDQVL-LABEL: ugt_55_v2i64:
33399 ; AVX512VPOPCNTDQVL: # %bb.0:
33400 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
33401 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [55,55]
33402 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
33403 ; AVX512VPOPCNTDQVL-NEXT: retq
33405 ; BITALG_NOVLX-LABEL: ugt_55_v2i64:
33406 ; BITALG_NOVLX: # %bb.0:
33407 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
33408 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
33409 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
33410 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33411 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
33412 ; BITALG_NOVLX-NEXT: vzeroupper
33413 ; BITALG_NOVLX-NEXT: retq
33415 ; BITALG-LABEL: ugt_55_v2i64:
33417 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
33418 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
33419 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33420 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [55,55]
33421 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
33422 ; BITALG-NEXT: retq
33423 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
33424 %3 = icmp ugt <2 x i64> %2, <i64 55, i64 55>
33425 %4 = sext <2 x i1> %3 to <2 x i64>
33429 define <2 x i64> @ult_56_v2i64(<2 x i64> %0) {
33430 ; SSE2-LABEL: ult_56_v2i64:
33432 ; SSE2-NEXT: movdqa %xmm0, %xmm1
33433 ; SSE2-NEXT: psrlw $1, %xmm1
33434 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33435 ; SSE2-NEXT: psubb %xmm1, %xmm0
33436 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33437 ; SSE2-NEXT: movdqa %xmm0, %xmm2
33438 ; SSE2-NEXT: pand %xmm1, %xmm2
33439 ; SSE2-NEXT: psrlw $2, %xmm0
33440 ; SSE2-NEXT: pand %xmm1, %xmm0
33441 ; SSE2-NEXT: paddb %xmm2, %xmm0
33442 ; SSE2-NEXT: movdqa %xmm0, %xmm1
33443 ; SSE2-NEXT: psrlw $4, %xmm1
33444 ; SSE2-NEXT: paddb %xmm0, %xmm1
33445 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33446 ; SSE2-NEXT: pxor %xmm0, %xmm0
33447 ; SSE2-NEXT: psadbw %xmm1, %xmm0
33448 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
33449 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [56,56,56,56]
33450 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
33453 ; SSE3-LABEL: ult_56_v2i64:
33455 ; SSE3-NEXT: movdqa %xmm0, %xmm1
33456 ; SSE3-NEXT: psrlw $1, %xmm1
33457 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33458 ; SSE3-NEXT: psubb %xmm1, %xmm0
33459 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33460 ; SSE3-NEXT: movdqa %xmm0, %xmm2
33461 ; SSE3-NEXT: pand %xmm1, %xmm2
33462 ; SSE3-NEXT: psrlw $2, %xmm0
33463 ; SSE3-NEXT: pand %xmm1, %xmm0
33464 ; SSE3-NEXT: paddb %xmm2, %xmm0
33465 ; SSE3-NEXT: movdqa %xmm0, %xmm1
33466 ; SSE3-NEXT: psrlw $4, %xmm1
33467 ; SSE3-NEXT: paddb %xmm0, %xmm1
33468 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33469 ; SSE3-NEXT: pxor %xmm0, %xmm0
33470 ; SSE3-NEXT: psadbw %xmm1, %xmm0
33471 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
33472 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [56,56,56,56]
33473 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
33476 ; SSSE3-LABEL: ult_56_v2i64:
33478 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33479 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
33480 ; SSSE3-NEXT: pand %xmm1, %xmm2
33481 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33482 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
33483 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
33484 ; SSSE3-NEXT: psrlw $4, %xmm0
33485 ; SSSE3-NEXT: pand %xmm1, %xmm0
33486 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
33487 ; SSSE3-NEXT: paddb %xmm4, %xmm3
33488 ; SSSE3-NEXT: pxor %xmm0, %xmm0
33489 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
33490 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
33491 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [56,56,56,56]
33492 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
33495 ; SSE41-LABEL: ult_56_v2i64:
33497 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33498 ; SSE41-NEXT: movdqa %xmm0, %xmm2
33499 ; SSE41-NEXT: pand %xmm1, %xmm2
33500 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33501 ; SSE41-NEXT: movdqa %xmm3, %xmm4
33502 ; SSE41-NEXT: pshufb %xmm2, %xmm4
33503 ; SSE41-NEXT: psrlw $4, %xmm0
33504 ; SSE41-NEXT: pand %xmm1, %xmm0
33505 ; SSE41-NEXT: pshufb %xmm0, %xmm3
33506 ; SSE41-NEXT: paddb %xmm4, %xmm3
33507 ; SSE41-NEXT: pxor %xmm0, %xmm0
33508 ; SSE41-NEXT: psadbw %xmm3, %xmm0
33509 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
33510 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [56,56,56,56]
33511 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
33514 ; AVX1-LABEL: ult_56_v2i64:
33516 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33517 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
33518 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33519 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
33520 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
33521 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
33522 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
33523 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
33524 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
33525 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33526 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [56,56]
33527 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
33530 ; AVX2-LABEL: ult_56_v2i64:
33532 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33533 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
33534 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33535 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
33536 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
33537 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
33538 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
33539 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
33540 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
33541 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33542 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [56,56]
33543 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
33546 ; AVX512VPOPCNTDQ-LABEL: ult_56_v2i64:
33547 ; AVX512VPOPCNTDQ: # %bb.0:
33548 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
33549 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
33550 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [56,56]
33551 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
33552 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
33553 ; AVX512VPOPCNTDQ-NEXT: retq
33555 ; AVX512VPOPCNTDQVL-LABEL: ult_56_v2i64:
33556 ; AVX512VPOPCNTDQVL: # %bb.0:
33557 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
33558 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [56,56]
33559 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
33560 ; AVX512VPOPCNTDQVL-NEXT: retq
33562 ; BITALG_NOVLX-LABEL: ult_56_v2i64:
33563 ; BITALG_NOVLX: # %bb.0:
33564 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
33565 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
33566 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
33567 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33568 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [56,56]
33569 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
33570 ; BITALG_NOVLX-NEXT: vzeroupper
33571 ; BITALG_NOVLX-NEXT: retq
33573 ; BITALG-LABEL: ult_56_v2i64:
33575 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
33576 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
33577 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33578 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [56,56]
33579 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
33580 ; BITALG-NEXT: retq
33581 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
33582 %3 = icmp ult <2 x i64> %2, <i64 56, i64 56>
33583 %4 = sext <2 x i1> %3 to <2 x i64>
33587 define <2 x i64> @ugt_56_v2i64(<2 x i64> %0) {
33588 ; SSE2-LABEL: ugt_56_v2i64:
33590 ; SSE2-NEXT: movdqa %xmm0, %xmm1
33591 ; SSE2-NEXT: psrlw $1, %xmm1
33592 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33593 ; SSE2-NEXT: psubb %xmm1, %xmm0
33594 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33595 ; SSE2-NEXT: movdqa %xmm0, %xmm2
33596 ; SSE2-NEXT: pand %xmm1, %xmm2
33597 ; SSE2-NEXT: psrlw $2, %xmm0
33598 ; SSE2-NEXT: pand %xmm1, %xmm0
33599 ; SSE2-NEXT: paddb %xmm2, %xmm0
33600 ; SSE2-NEXT: movdqa %xmm0, %xmm1
33601 ; SSE2-NEXT: psrlw $4, %xmm1
33602 ; SSE2-NEXT: paddb %xmm0, %xmm1
33603 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33604 ; SSE2-NEXT: pxor %xmm0, %xmm0
33605 ; SSE2-NEXT: psadbw %xmm1, %xmm0
33606 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
33607 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
33610 ; SSE3-LABEL: ugt_56_v2i64:
33612 ; SSE3-NEXT: movdqa %xmm0, %xmm1
33613 ; SSE3-NEXT: psrlw $1, %xmm1
33614 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33615 ; SSE3-NEXT: psubb %xmm1, %xmm0
33616 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33617 ; SSE3-NEXT: movdqa %xmm0, %xmm2
33618 ; SSE3-NEXT: pand %xmm1, %xmm2
33619 ; SSE3-NEXT: psrlw $2, %xmm0
33620 ; SSE3-NEXT: pand %xmm1, %xmm0
33621 ; SSE3-NEXT: paddb %xmm2, %xmm0
33622 ; SSE3-NEXT: movdqa %xmm0, %xmm1
33623 ; SSE3-NEXT: psrlw $4, %xmm1
33624 ; SSE3-NEXT: paddb %xmm0, %xmm1
33625 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33626 ; SSE3-NEXT: pxor %xmm0, %xmm0
33627 ; SSE3-NEXT: psadbw %xmm1, %xmm0
33628 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
33629 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
33632 ; SSSE3-LABEL: ugt_56_v2i64:
33634 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33635 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
33636 ; SSSE3-NEXT: pand %xmm1, %xmm2
33637 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33638 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
33639 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
33640 ; SSSE3-NEXT: psrlw $4, %xmm0
33641 ; SSSE3-NEXT: pand %xmm1, %xmm0
33642 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
33643 ; SSSE3-NEXT: paddb %xmm4, %xmm3
33644 ; SSSE3-NEXT: pxor %xmm0, %xmm0
33645 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
33646 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
33647 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
33650 ; SSE41-LABEL: ugt_56_v2i64:
33652 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33653 ; SSE41-NEXT: movdqa %xmm0, %xmm2
33654 ; SSE41-NEXT: pand %xmm1, %xmm2
33655 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33656 ; SSE41-NEXT: movdqa %xmm3, %xmm4
33657 ; SSE41-NEXT: pshufb %xmm2, %xmm4
33658 ; SSE41-NEXT: psrlw $4, %xmm0
33659 ; SSE41-NEXT: pand %xmm1, %xmm0
33660 ; SSE41-NEXT: pshufb %xmm0, %xmm3
33661 ; SSE41-NEXT: paddb %xmm4, %xmm3
33662 ; SSE41-NEXT: pxor %xmm0, %xmm0
33663 ; SSE41-NEXT: psadbw %xmm3, %xmm0
33664 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
33665 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
33668 ; AVX1-LABEL: ugt_56_v2i64:
33670 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33671 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
33672 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33673 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
33674 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
33675 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
33676 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
33677 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
33678 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
33679 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33680 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
33683 ; AVX2-LABEL: ugt_56_v2i64:
33685 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33686 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
33687 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33688 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
33689 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
33690 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
33691 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
33692 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
33693 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
33694 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33695 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
33698 ; AVX512VPOPCNTDQ-LABEL: ugt_56_v2i64:
33699 ; AVX512VPOPCNTDQ: # %bb.0:
33700 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
33701 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
33702 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
33703 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
33704 ; AVX512VPOPCNTDQ-NEXT: retq
33706 ; AVX512VPOPCNTDQVL-LABEL: ugt_56_v2i64:
33707 ; AVX512VPOPCNTDQVL: # %bb.0:
33708 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
33709 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [56,56]
33710 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
33711 ; AVX512VPOPCNTDQVL-NEXT: retq
33713 ; BITALG_NOVLX-LABEL: ugt_56_v2i64:
33714 ; BITALG_NOVLX: # %bb.0:
33715 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
33716 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
33717 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
33718 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33719 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
33720 ; BITALG_NOVLX-NEXT: vzeroupper
33721 ; BITALG_NOVLX-NEXT: retq
33723 ; BITALG-LABEL: ugt_56_v2i64:
33725 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
33726 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
33727 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33728 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [56,56]
33729 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
33730 ; BITALG-NEXT: retq
33731 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
33732 %3 = icmp ugt <2 x i64> %2, <i64 56, i64 56>
33733 %4 = sext <2 x i1> %3 to <2 x i64>
33737 define <2 x i64> @ult_57_v2i64(<2 x i64> %0) {
33738 ; SSE2-LABEL: ult_57_v2i64:
33740 ; SSE2-NEXT: movdqa %xmm0, %xmm1
33741 ; SSE2-NEXT: psrlw $1, %xmm1
33742 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33743 ; SSE2-NEXT: psubb %xmm1, %xmm0
33744 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33745 ; SSE2-NEXT: movdqa %xmm0, %xmm2
33746 ; SSE2-NEXT: pand %xmm1, %xmm2
33747 ; SSE2-NEXT: psrlw $2, %xmm0
33748 ; SSE2-NEXT: pand %xmm1, %xmm0
33749 ; SSE2-NEXT: paddb %xmm2, %xmm0
33750 ; SSE2-NEXT: movdqa %xmm0, %xmm1
33751 ; SSE2-NEXT: psrlw $4, %xmm1
33752 ; SSE2-NEXT: paddb %xmm0, %xmm1
33753 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33754 ; SSE2-NEXT: pxor %xmm0, %xmm0
33755 ; SSE2-NEXT: psadbw %xmm1, %xmm0
33756 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
33757 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [57,57,57,57]
33758 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
33761 ; SSE3-LABEL: ult_57_v2i64:
33763 ; SSE3-NEXT: movdqa %xmm0, %xmm1
33764 ; SSE3-NEXT: psrlw $1, %xmm1
33765 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33766 ; SSE3-NEXT: psubb %xmm1, %xmm0
33767 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33768 ; SSE3-NEXT: movdqa %xmm0, %xmm2
33769 ; SSE3-NEXT: pand %xmm1, %xmm2
33770 ; SSE3-NEXT: psrlw $2, %xmm0
33771 ; SSE3-NEXT: pand %xmm1, %xmm0
33772 ; SSE3-NEXT: paddb %xmm2, %xmm0
33773 ; SSE3-NEXT: movdqa %xmm0, %xmm1
33774 ; SSE3-NEXT: psrlw $4, %xmm1
33775 ; SSE3-NEXT: paddb %xmm0, %xmm1
33776 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33777 ; SSE3-NEXT: pxor %xmm0, %xmm0
33778 ; SSE3-NEXT: psadbw %xmm1, %xmm0
33779 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
33780 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [57,57,57,57]
33781 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
33784 ; SSSE3-LABEL: ult_57_v2i64:
33786 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33787 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
33788 ; SSSE3-NEXT: pand %xmm1, %xmm2
33789 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33790 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
33791 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
33792 ; SSSE3-NEXT: psrlw $4, %xmm0
33793 ; SSSE3-NEXT: pand %xmm1, %xmm0
33794 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
33795 ; SSSE3-NEXT: paddb %xmm4, %xmm3
33796 ; SSSE3-NEXT: pxor %xmm0, %xmm0
33797 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
33798 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
33799 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [57,57,57,57]
33800 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
33803 ; SSE41-LABEL: ult_57_v2i64:
33805 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33806 ; SSE41-NEXT: movdqa %xmm0, %xmm2
33807 ; SSE41-NEXT: pand %xmm1, %xmm2
33808 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33809 ; SSE41-NEXT: movdqa %xmm3, %xmm4
33810 ; SSE41-NEXT: pshufb %xmm2, %xmm4
33811 ; SSE41-NEXT: psrlw $4, %xmm0
33812 ; SSE41-NEXT: pand %xmm1, %xmm0
33813 ; SSE41-NEXT: pshufb %xmm0, %xmm3
33814 ; SSE41-NEXT: paddb %xmm4, %xmm3
33815 ; SSE41-NEXT: pxor %xmm0, %xmm0
33816 ; SSE41-NEXT: psadbw %xmm3, %xmm0
33817 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
33818 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [57,57,57,57]
33819 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
33822 ; AVX1-LABEL: ult_57_v2i64:
33824 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33825 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
33826 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33827 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
33828 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
33829 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
33830 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
33831 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
33832 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
33833 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33834 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [57,57]
33835 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
33838 ; AVX2-LABEL: ult_57_v2i64:
33840 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33841 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
33842 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33843 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
33844 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
33845 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
33846 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
33847 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
33848 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
33849 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33850 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [57,57]
33851 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
33854 ; AVX512VPOPCNTDQ-LABEL: ult_57_v2i64:
33855 ; AVX512VPOPCNTDQ: # %bb.0:
33856 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
33857 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
33858 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [57,57]
33859 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
33860 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
33861 ; AVX512VPOPCNTDQ-NEXT: retq
33863 ; AVX512VPOPCNTDQVL-LABEL: ult_57_v2i64:
33864 ; AVX512VPOPCNTDQVL: # %bb.0:
33865 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
33866 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [57,57]
33867 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
33868 ; AVX512VPOPCNTDQVL-NEXT: retq
33870 ; BITALG_NOVLX-LABEL: ult_57_v2i64:
33871 ; BITALG_NOVLX: # %bb.0:
33872 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
33873 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
33874 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
33875 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33876 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [57,57]
33877 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
33878 ; BITALG_NOVLX-NEXT: vzeroupper
33879 ; BITALG_NOVLX-NEXT: retq
33881 ; BITALG-LABEL: ult_57_v2i64:
33883 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
33884 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
33885 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33886 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [57,57]
33887 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
33888 ; BITALG-NEXT: retq
33889 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
33890 %3 = icmp ult <2 x i64> %2, <i64 57, i64 57>
33891 %4 = sext <2 x i1> %3 to <2 x i64>
33895 define <2 x i64> @ugt_57_v2i64(<2 x i64> %0) {
33896 ; SSE2-LABEL: ugt_57_v2i64:
33898 ; SSE2-NEXT: movdqa %xmm0, %xmm1
33899 ; SSE2-NEXT: psrlw $1, %xmm1
33900 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33901 ; SSE2-NEXT: psubb %xmm1, %xmm0
33902 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33903 ; SSE2-NEXT: movdqa %xmm0, %xmm2
33904 ; SSE2-NEXT: pand %xmm1, %xmm2
33905 ; SSE2-NEXT: psrlw $2, %xmm0
33906 ; SSE2-NEXT: pand %xmm1, %xmm0
33907 ; SSE2-NEXT: paddb %xmm2, %xmm0
33908 ; SSE2-NEXT: movdqa %xmm0, %xmm1
33909 ; SSE2-NEXT: psrlw $4, %xmm1
33910 ; SSE2-NEXT: paddb %xmm0, %xmm1
33911 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33912 ; SSE2-NEXT: pxor %xmm0, %xmm0
33913 ; SSE2-NEXT: psadbw %xmm1, %xmm0
33914 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
33915 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
33918 ; SSE3-LABEL: ugt_57_v2i64:
33920 ; SSE3-NEXT: movdqa %xmm0, %xmm1
33921 ; SSE3-NEXT: psrlw $1, %xmm1
33922 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33923 ; SSE3-NEXT: psubb %xmm1, %xmm0
33924 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
33925 ; SSE3-NEXT: movdqa %xmm0, %xmm2
33926 ; SSE3-NEXT: pand %xmm1, %xmm2
33927 ; SSE3-NEXT: psrlw $2, %xmm0
33928 ; SSE3-NEXT: pand %xmm1, %xmm0
33929 ; SSE3-NEXT: paddb %xmm2, %xmm0
33930 ; SSE3-NEXT: movdqa %xmm0, %xmm1
33931 ; SSE3-NEXT: psrlw $4, %xmm1
33932 ; SSE3-NEXT: paddb %xmm0, %xmm1
33933 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
33934 ; SSE3-NEXT: pxor %xmm0, %xmm0
33935 ; SSE3-NEXT: psadbw %xmm1, %xmm0
33936 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
33937 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
33940 ; SSSE3-LABEL: ugt_57_v2i64:
33942 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33943 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
33944 ; SSSE3-NEXT: pand %xmm1, %xmm2
33945 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33946 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
33947 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
33948 ; SSSE3-NEXT: psrlw $4, %xmm0
33949 ; SSSE3-NEXT: pand %xmm1, %xmm0
33950 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
33951 ; SSSE3-NEXT: paddb %xmm4, %xmm3
33952 ; SSSE3-NEXT: pxor %xmm0, %xmm0
33953 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
33954 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
33955 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
33958 ; SSE41-LABEL: ugt_57_v2i64:
33960 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33961 ; SSE41-NEXT: movdqa %xmm0, %xmm2
33962 ; SSE41-NEXT: pand %xmm1, %xmm2
33963 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33964 ; SSE41-NEXT: movdqa %xmm3, %xmm4
33965 ; SSE41-NEXT: pshufb %xmm2, %xmm4
33966 ; SSE41-NEXT: psrlw $4, %xmm0
33967 ; SSE41-NEXT: pand %xmm1, %xmm0
33968 ; SSE41-NEXT: pshufb %xmm0, %xmm3
33969 ; SSE41-NEXT: paddb %xmm4, %xmm3
33970 ; SSE41-NEXT: pxor %xmm0, %xmm0
33971 ; SSE41-NEXT: psadbw %xmm3, %xmm0
33972 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
33973 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
33976 ; AVX1-LABEL: ugt_57_v2i64:
33978 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33979 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
33980 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33981 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
33982 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
33983 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
33984 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
33985 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
33986 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
33987 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
33988 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
33991 ; AVX2-LABEL: ugt_57_v2i64:
33993 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
33994 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
33995 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
33996 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
33997 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
33998 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
33999 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
34000 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
34001 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
34002 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34003 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
34006 ; AVX512VPOPCNTDQ-LABEL: ugt_57_v2i64:
34007 ; AVX512VPOPCNTDQ: # %bb.0:
34008 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
34009 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
34010 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
34011 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
34012 ; AVX512VPOPCNTDQ-NEXT: retq
34014 ; AVX512VPOPCNTDQVL-LABEL: ugt_57_v2i64:
34015 ; AVX512VPOPCNTDQVL: # %bb.0:
34016 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
34017 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [57,57]
34018 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
34019 ; AVX512VPOPCNTDQVL-NEXT: retq
34021 ; BITALG_NOVLX-LABEL: ugt_57_v2i64:
34022 ; BITALG_NOVLX: # %bb.0:
34023 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
34024 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
34025 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
34026 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34027 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
34028 ; BITALG_NOVLX-NEXT: vzeroupper
34029 ; BITALG_NOVLX-NEXT: retq
34031 ; BITALG-LABEL: ugt_57_v2i64:
34033 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
34034 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
34035 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34036 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [57,57]
34037 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
34038 ; BITALG-NEXT: retq
34039 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
34040 %3 = icmp ugt <2 x i64> %2, <i64 57, i64 57>
34041 %4 = sext <2 x i1> %3 to <2 x i64>
34045 define <2 x i64> @ult_58_v2i64(<2 x i64> %0) {
34046 ; SSE2-LABEL: ult_58_v2i64:
34048 ; SSE2-NEXT: movdqa %xmm0, %xmm1
34049 ; SSE2-NEXT: psrlw $1, %xmm1
34050 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34051 ; SSE2-NEXT: psubb %xmm1, %xmm0
34052 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34053 ; SSE2-NEXT: movdqa %xmm0, %xmm2
34054 ; SSE2-NEXT: pand %xmm1, %xmm2
34055 ; SSE2-NEXT: psrlw $2, %xmm0
34056 ; SSE2-NEXT: pand %xmm1, %xmm0
34057 ; SSE2-NEXT: paddb %xmm2, %xmm0
34058 ; SSE2-NEXT: movdqa %xmm0, %xmm1
34059 ; SSE2-NEXT: psrlw $4, %xmm1
34060 ; SSE2-NEXT: paddb %xmm0, %xmm1
34061 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34062 ; SSE2-NEXT: pxor %xmm0, %xmm0
34063 ; SSE2-NEXT: psadbw %xmm1, %xmm0
34064 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
34065 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [58,58,58,58]
34066 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
34069 ; SSE3-LABEL: ult_58_v2i64:
34071 ; SSE3-NEXT: movdqa %xmm0, %xmm1
34072 ; SSE3-NEXT: psrlw $1, %xmm1
34073 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34074 ; SSE3-NEXT: psubb %xmm1, %xmm0
34075 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34076 ; SSE3-NEXT: movdqa %xmm0, %xmm2
34077 ; SSE3-NEXT: pand %xmm1, %xmm2
34078 ; SSE3-NEXT: psrlw $2, %xmm0
34079 ; SSE3-NEXT: pand %xmm1, %xmm0
34080 ; SSE3-NEXT: paddb %xmm2, %xmm0
34081 ; SSE3-NEXT: movdqa %xmm0, %xmm1
34082 ; SSE3-NEXT: psrlw $4, %xmm1
34083 ; SSE3-NEXT: paddb %xmm0, %xmm1
34084 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34085 ; SSE3-NEXT: pxor %xmm0, %xmm0
34086 ; SSE3-NEXT: psadbw %xmm1, %xmm0
34087 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
34088 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [58,58,58,58]
34089 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
34092 ; SSSE3-LABEL: ult_58_v2i64:
34094 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34095 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
34096 ; SSSE3-NEXT: pand %xmm1, %xmm2
34097 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34098 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
34099 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
34100 ; SSSE3-NEXT: psrlw $4, %xmm0
34101 ; SSSE3-NEXT: pand %xmm1, %xmm0
34102 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
34103 ; SSSE3-NEXT: paddb %xmm4, %xmm3
34104 ; SSSE3-NEXT: pxor %xmm0, %xmm0
34105 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
34106 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
34107 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [58,58,58,58]
34108 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
34111 ; SSE41-LABEL: ult_58_v2i64:
34113 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34114 ; SSE41-NEXT: movdqa %xmm0, %xmm2
34115 ; SSE41-NEXT: pand %xmm1, %xmm2
34116 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34117 ; SSE41-NEXT: movdqa %xmm3, %xmm4
34118 ; SSE41-NEXT: pshufb %xmm2, %xmm4
34119 ; SSE41-NEXT: psrlw $4, %xmm0
34120 ; SSE41-NEXT: pand %xmm1, %xmm0
34121 ; SSE41-NEXT: pshufb %xmm0, %xmm3
34122 ; SSE41-NEXT: paddb %xmm4, %xmm3
34123 ; SSE41-NEXT: pxor %xmm0, %xmm0
34124 ; SSE41-NEXT: psadbw %xmm3, %xmm0
34125 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
34126 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [58,58,58,58]
34127 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
34130 ; AVX1-LABEL: ult_58_v2i64:
34132 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34133 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
34134 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34135 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
34136 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
34137 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
34138 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
34139 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
34140 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
34141 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34142 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [58,58]
34143 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
34146 ; AVX2-LABEL: ult_58_v2i64:
34148 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34149 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
34150 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34151 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
34152 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
34153 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
34154 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
34155 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
34156 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
34157 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34158 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [58,58]
34159 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
34162 ; AVX512VPOPCNTDQ-LABEL: ult_58_v2i64:
34163 ; AVX512VPOPCNTDQ: # %bb.0:
34164 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
34165 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
34166 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [58,58]
34167 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
34168 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
34169 ; AVX512VPOPCNTDQ-NEXT: retq
34171 ; AVX512VPOPCNTDQVL-LABEL: ult_58_v2i64:
34172 ; AVX512VPOPCNTDQVL: # %bb.0:
34173 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
34174 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [58,58]
34175 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
34176 ; AVX512VPOPCNTDQVL-NEXT: retq
34178 ; BITALG_NOVLX-LABEL: ult_58_v2i64:
34179 ; BITALG_NOVLX: # %bb.0:
34180 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
34181 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
34182 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
34183 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34184 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [58,58]
34185 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
34186 ; BITALG_NOVLX-NEXT: vzeroupper
34187 ; BITALG_NOVLX-NEXT: retq
34189 ; BITALG-LABEL: ult_58_v2i64:
34191 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
34192 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
34193 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34194 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [58,58]
34195 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
34196 ; BITALG-NEXT: retq
34197 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
34198 %3 = icmp ult <2 x i64> %2, <i64 58, i64 58>
34199 %4 = sext <2 x i1> %3 to <2 x i64>
34203 define <2 x i64> @ugt_58_v2i64(<2 x i64> %0) {
34204 ; SSE2-LABEL: ugt_58_v2i64:
34206 ; SSE2-NEXT: movdqa %xmm0, %xmm1
34207 ; SSE2-NEXT: psrlw $1, %xmm1
34208 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34209 ; SSE2-NEXT: psubb %xmm1, %xmm0
34210 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34211 ; SSE2-NEXT: movdqa %xmm0, %xmm2
34212 ; SSE2-NEXT: pand %xmm1, %xmm2
34213 ; SSE2-NEXT: psrlw $2, %xmm0
34214 ; SSE2-NEXT: pand %xmm1, %xmm0
34215 ; SSE2-NEXT: paddb %xmm2, %xmm0
34216 ; SSE2-NEXT: movdqa %xmm0, %xmm1
34217 ; SSE2-NEXT: psrlw $4, %xmm1
34218 ; SSE2-NEXT: paddb %xmm0, %xmm1
34219 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34220 ; SSE2-NEXT: pxor %xmm0, %xmm0
34221 ; SSE2-NEXT: psadbw %xmm1, %xmm0
34222 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
34223 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
34226 ; SSE3-LABEL: ugt_58_v2i64:
34228 ; SSE3-NEXT: movdqa %xmm0, %xmm1
34229 ; SSE3-NEXT: psrlw $1, %xmm1
34230 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34231 ; SSE3-NEXT: psubb %xmm1, %xmm0
34232 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34233 ; SSE3-NEXT: movdqa %xmm0, %xmm2
34234 ; SSE3-NEXT: pand %xmm1, %xmm2
34235 ; SSE3-NEXT: psrlw $2, %xmm0
34236 ; SSE3-NEXT: pand %xmm1, %xmm0
34237 ; SSE3-NEXT: paddb %xmm2, %xmm0
34238 ; SSE3-NEXT: movdqa %xmm0, %xmm1
34239 ; SSE3-NEXT: psrlw $4, %xmm1
34240 ; SSE3-NEXT: paddb %xmm0, %xmm1
34241 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34242 ; SSE3-NEXT: pxor %xmm0, %xmm0
34243 ; SSE3-NEXT: psadbw %xmm1, %xmm0
34244 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
34245 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
34248 ; SSSE3-LABEL: ugt_58_v2i64:
34250 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34251 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
34252 ; SSSE3-NEXT: pand %xmm1, %xmm2
34253 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34254 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
34255 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
34256 ; SSSE3-NEXT: psrlw $4, %xmm0
34257 ; SSSE3-NEXT: pand %xmm1, %xmm0
34258 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
34259 ; SSSE3-NEXT: paddb %xmm4, %xmm3
34260 ; SSSE3-NEXT: pxor %xmm0, %xmm0
34261 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
34262 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
34263 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
34266 ; SSE41-LABEL: ugt_58_v2i64:
34268 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34269 ; SSE41-NEXT: movdqa %xmm0, %xmm2
34270 ; SSE41-NEXT: pand %xmm1, %xmm2
34271 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34272 ; SSE41-NEXT: movdqa %xmm3, %xmm4
34273 ; SSE41-NEXT: pshufb %xmm2, %xmm4
34274 ; SSE41-NEXT: psrlw $4, %xmm0
34275 ; SSE41-NEXT: pand %xmm1, %xmm0
34276 ; SSE41-NEXT: pshufb %xmm0, %xmm3
34277 ; SSE41-NEXT: paddb %xmm4, %xmm3
34278 ; SSE41-NEXT: pxor %xmm0, %xmm0
34279 ; SSE41-NEXT: psadbw %xmm3, %xmm0
34280 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
34281 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
34284 ; AVX1-LABEL: ugt_58_v2i64:
34286 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34287 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
34288 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34289 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
34290 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
34291 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
34292 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
34293 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
34294 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
34295 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34296 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
34299 ; AVX2-LABEL: ugt_58_v2i64:
34301 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34302 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
34303 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34304 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
34305 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
34306 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
34307 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
34308 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
34309 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
34310 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34311 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
34314 ; AVX512VPOPCNTDQ-LABEL: ugt_58_v2i64:
34315 ; AVX512VPOPCNTDQ: # %bb.0:
34316 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
34317 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
34318 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
34319 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
34320 ; AVX512VPOPCNTDQ-NEXT: retq
34322 ; AVX512VPOPCNTDQVL-LABEL: ugt_58_v2i64:
34323 ; AVX512VPOPCNTDQVL: # %bb.0:
34324 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
34325 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [58,58]
34326 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
34327 ; AVX512VPOPCNTDQVL-NEXT: retq
34329 ; BITALG_NOVLX-LABEL: ugt_58_v2i64:
34330 ; BITALG_NOVLX: # %bb.0:
34331 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
34332 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
34333 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
34334 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34335 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
34336 ; BITALG_NOVLX-NEXT: vzeroupper
34337 ; BITALG_NOVLX-NEXT: retq
34339 ; BITALG-LABEL: ugt_58_v2i64:
34341 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
34342 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
34343 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34344 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [58,58]
34345 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
34346 ; BITALG-NEXT: retq
34347 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
34348 %3 = icmp ugt <2 x i64> %2, <i64 58, i64 58>
34349 %4 = sext <2 x i1> %3 to <2 x i64>
34353 define <2 x i64> @ult_59_v2i64(<2 x i64> %0) {
34354 ; SSE2-LABEL: ult_59_v2i64:
34356 ; SSE2-NEXT: movdqa %xmm0, %xmm1
34357 ; SSE2-NEXT: psrlw $1, %xmm1
34358 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34359 ; SSE2-NEXT: psubb %xmm1, %xmm0
34360 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34361 ; SSE2-NEXT: movdqa %xmm0, %xmm2
34362 ; SSE2-NEXT: pand %xmm1, %xmm2
34363 ; SSE2-NEXT: psrlw $2, %xmm0
34364 ; SSE2-NEXT: pand %xmm1, %xmm0
34365 ; SSE2-NEXT: paddb %xmm2, %xmm0
34366 ; SSE2-NEXT: movdqa %xmm0, %xmm1
34367 ; SSE2-NEXT: psrlw $4, %xmm1
34368 ; SSE2-NEXT: paddb %xmm0, %xmm1
34369 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34370 ; SSE2-NEXT: pxor %xmm0, %xmm0
34371 ; SSE2-NEXT: psadbw %xmm1, %xmm0
34372 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
34373 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [59,59,59,59]
34374 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
34377 ; SSE3-LABEL: ult_59_v2i64:
34379 ; SSE3-NEXT: movdqa %xmm0, %xmm1
34380 ; SSE3-NEXT: psrlw $1, %xmm1
34381 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34382 ; SSE3-NEXT: psubb %xmm1, %xmm0
34383 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34384 ; SSE3-NEXT: movdqa %xmm0, %xmm2
34385 ; SSE3-NEXT: pand %xmm1, %xmm2
34386 ; SSE3-NEXT: psrlw $2, %xmm0
34387 ; SSE3-NEXT: pand %xmm1, %xmm0
34388 ; SSE3-NEXT: paddb %xmm2, %xmm0
34389 ; SSE3-NEXT: movdqa %xmm0, %xmm1
34390 ; SSE3-NEXT: psrlw $4, %xmm1
34391 ; SSE3-NEXT: paddb %xmm0, %xmm1
34392 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34393 ; SSE3-NEXT: pxor %xmm0, %xmm0
34394 ; SSE3-NEXT: psadbw %xmm1, %xmm0
34395 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
34396 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [59,59,59,59]
34397 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
34400 ; SSSE3-LABEL: ult_59_v2i64:
34402 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34403 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
34404 ; SSSE3-NEXT: pand %xmm1, %xmm2
34405 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34406 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
34407 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
34408 ; SSSE3-NEXT: psrlw $4, %xmm0
34409 ; SSSE3-NEXT: pand %xmm1, %xmm0
34410 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
34411 ; SSSE3-NEXT: paddb %xmm4, %xmm3
34412 ; SSSE3-NEXT: pxor %xmm0, %xmm0
34413 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
34414 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
34415 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [59,59,59,59]
34416 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
34419 ; SSE41-LABEL: ult_59_v2i64:
34421 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34422 ; SSE41-NEXT: movdqa %xmm0, %xmm2
34423 ; SSE41-NEXT: pand %xmm1, %xmm2
34424 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34425 ; SSE41-NEXT: movdqa %xmm3, %xmm4
34426 ; SSE41-NEXT: pshufb %xmm2, %xmm4
34427 ; SSE41-NEXT: psrlw $4, %xmm0
34428 ; SSE41-NEXT: pand %xmm1, %xmm0
34429 ; SSE41-NEXT: pshufb %xmm0, %xmm3
34430 ; SSE41-NEXT: paddb %xmm4, %xmm3
34431 ; SSE41-NEXT: pxor %xmm0, %xmm0
34432 ; SSE41-NEXT: psadbw %xmm3, %xmm0
34433 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
34434 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [59,59,59,59]
34435 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
34438 ; AVX1-LABEL: ult_59_v2i64:
34440 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34441 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
34442 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34443 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
34444 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
34445 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
34446 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
34447 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
34448 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
34449 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34450 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [59,59]
34451 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
34454 ; AVX2-LABEL: ult_59_v2i64:
34456 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34457 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
34458 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34459 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
34460 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
34461 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
34462 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
34463 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
34464 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
34465 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34466 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [59,59]
34467 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
34470 ; AVX512VPOPCNTDQ-LABEL: ult_59_v2i64:
34471 ; AVX512VPOPCNTDQ: # %bb.0:
34472 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
34473 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
34474 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [59,59]
34475 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
34476 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
34477 ; AVX512VPOPCNTDQ-NEXT: retq
34479 ; AVX512VPOPCNTDQVL-LABEL: ult_59_v2i64:
34480 ; AVX512VPOPCNTDQVL: # %bb.0:
34481 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
34482 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [59,59]
34483 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
34484 ; AVX512VPOPCNTDQVL-NEXT: retq
34486 ; BITALG_NOVLX-LABEL: ult_59_v2i64:
34487 ; BITALG_NOVLX: # %bb.0:
34488 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
34489 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
34490 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
34491 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34492 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [59,59]
34493 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
34494 ; BITALG_NOVLX-NEXT: vzeroupper
34495 ; BITALG_NOVLX-NEXT: retq
34497 ; BITALG-LABEL: ult_59_v2i64:
34499 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
34500 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
34501 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34502 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [59,59]
34503 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
34504 ; BITALG-NEXT: retq
34505 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
34506 %3 = icmp ult <2 x i64> %2, <i64 59, i64 59>
34507 %4 = sext <2 x i1> %3 to <2 x i64>
34511 define <2 x i64> @ugt_59_v2i64(<2 x i64> %0) {
34512 ; SSE2-LABEL: ugt_59_v2i64:
34514 ; SSE2-NEXT: movdqa %xmm0, %xmm1
34515 ; SSE2-NEXT: psrlw $1, %xmm1
34516 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34517 ; SSE2-NEXT: psubb %xmm1, %xmm0
34518 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34519 ; SSE2-NEXT: movdqa %xmm0, %xmm2
34520 ; SSE2-NEXT: pand %xmm1, %xmm2
34521 ; SSE2-NEXT: psrlw $2, %xmm0
34522 ; SSE2-NEXT: pand %xmm1, %xmm0
34523 ; SSE2-NEXT: paddb %xmm2, %xmm0
34524 ; SSE2-NEXT: movdqa %xmm0, %xmm1
34525 ; SSE2-NEXT: psrlw $4, %xmm1
34526 ; SSE2-NEXT: paddb %xmm0, %xmm1
34527 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34528 ; SSE2-NEXT: pxor %xmm0, %xmm0
34529 ; SSE2-NEXT: psadbw %xmm1, %xmm0
34530 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
34531 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
34534 ; SSE3-LABEL: ugt_59_v2i64:
34536 ; SSE3-NEXT: movdqa %xmm0, %xmm1
34537 ; SSE3-NEXT: psrlw $1, %xmm1
34538 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34539 ; SSE3-NEXT: psubb %xmm1, %xmm0
34540 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34541 ; SSE3-NEXT: movdqa %xmm0, %xmm2
34542 ; SSE3-NEXT: pand %xmm1, %xmm2
34543 ; SSE3-NEXT: psrlw $2, %xmm0
34544 ; SSE3-NEXT: pand %xmm1, %xmm0
34545 ; SSE3-NEXT: paddb %xmm2, %xmm0
34546 ; SSE3-NEXT: movdqa %xmm0, %xmm1
34547 ; SSE3-NEXT: psrlw $4, %xmm1
34548 ; SSE3-NEXT: paddb %xmm0, %xmm1
34549 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34550 ; SSE3-NEXT: pxor %xmm0, %xmm0
34551 ; SSE3-NEXT: psadbw %xmm1, %xmm0
34552 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
34553 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
34556 ; SSSE3-LABEL: ugt_59_v2i64:
34558 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34559 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
34560 ; SSSE3-NEXT: pand %xmm1, %xmm2
34561 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34562 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
34563 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
34564 ; SSSE3-NEXT: psrlw $4, %xmm0
34565 ; SSSE3-NEXT: pand %xmm1, %xmm0
34566 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
34567 ; SSSE3-NEXT: paddb %xmm4, %xmm3
34568 ; SSSE3-NEXT: pxor %xmm0, %xmm0
34569 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
34570 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
34571 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
34574 ; SSE41-LABEL: ugt_59_v2i64:
34576 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34577 ; SSE41-NEXT: movdqa %xmm0, %xmm2
34578 ; SSE41-NEXT: pand %xmm1, %xmm2
34579 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34580 ; SSE41-NEXT: movdqa %xmm3, %xmm4
34581 ; SSE41-NEXT: pshufb %xmm2, %xmm4
34582 ; SSE41-NEXT: psrlw $4, %xmm0
34583 ; SSE41-NEXT: pand %xmm1, %xmm0
34584 ; SSE41-NEXT: pshufb %xmm0, %xmm3
34585 ; SSE41-NEXT: paddb %xmm4, %xmm3
34586 ; SSE41-NEXT: pxor %xmm0, %xmm0
34587 ; SSE41-NEXT: psadbw %xmm3, %xmm0
34588 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
34589 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
34592 ; AVX1-LABEL: ugt_59_v2i64:
34594 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34595 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
34596 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34597 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
34598 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
34599 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
34600 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
34601 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
34602 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
34603 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34604 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
34607 ; AVX2-LABEL: ugt_59_v2i64:
34609 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34610 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
34611 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34612 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
34613 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
34614 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
34615 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
34616 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
34617 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
34618 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34619 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
34622 ; AVX512VPOPCNTDQ-LABEL: ugt_59_v2i64:
34623 ; AVX512VPOPCNTDQ: # %bb.0:
34624 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
34625 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
34626 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
34627 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
34628 ; AVX512VPOPCNTDQ-NEXT: retq
34630 ; AVX512VPOPCNTDQVL-LABEL: ugt_59_v2i64:
34631 ; AVX512VPOPCNTDQVL: # %bb.0:
34632 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
34633 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [59,59]
34634 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
34635 ; AVX512VPOPCNTDQVL-NEXT: retq
34637 ; BITALG_NOVLX-LABEL: ugt_59_v2i64:
34638 ; BITALG_NOVLX: # %bb.0:
34639 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
34640 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
34641 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
34642 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34643 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
34644 ; BITALG_NOVLX-NEXT: vzeroupper
34645 ; BITALG_NOVLX-NEXT: retq
34647 ; BITALG-LABEL: ugt_59_v2i64:
34649 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
34650 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
34651 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34652 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [59,59]
34653 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
34654 ; BITALG-NEXT: retq
34655 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
34656 %3 = icmp ugt <2 x i64> %2, <i64 59, i64 59>
34657 %4 = sext <2 x i1> %3 to <2 x i64>
34661 define <2 x i64> @ult_60_v2i64(<2 x i64> %0) {
34662 ; SSE2-LABEL: ult_60_v2i64:
34664 ; SSE2-NEXT: movdqa %xmm0, %xmm1
34665 ; SSE2-NEXT: psrlw $1, %xmm1
34666 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34667 ; SSE2-NEXT: psubb %xmm1, %xmm0
34668 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34669 ; SSE2-NEXT: movdqa %xmm0, %xmm2
34670 ; SSE2-NEXT: pand %xmm1, %xmm2
34671 ; SSE2-NEXT: psrlw $2, %xmm0
34672 ; SSE2-NEXT: pand %xmm1, %xmm0
34673 ; SSE2-NEXT: paddb %xmm2, %xmm0
34674 ; SSE2-NEXT: movdqa %xmm0, %xmm1
34675 ; SSE2-NEXT: psrlw $4, %xmm1
34676 ; SSE2-NEXT: paddb %xmm0, %xmm1
34677 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34678 ; SSE2-NEXT: pxor %xmm0, %xmm0
34679 ; SSE2-NEXT: psadbw %xmm1, %xmm0
34680 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
34681 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [60,60,60,60]
34682 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
34685 ; SSE3-LABEL: ult_60_v2i64:
34687 ; SSE3-NEXT: movdqa %xmm0, %xmm1
34688 ; SSE3-NEXT: psrlw $1, %xmm1
34689 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34690 ; SSE3-NEXT: psubb %xmm1, %xmm0
34691 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34692 ; SSE3-NEXT: movdqa %xmm0, %xmm2
34693 ; SSE3-NEXT: pand %xmm1, %xmm2
34694 ; SSE3-NEXT: psrlw $2, %xmm0
34695 ; SSE3-NEXT: pand %xmm1, %xmm0
34696 ; SSE3-NEXT: paddb %xmm2, %xmm0
34697 ; SSE3-NEXT: movdqa %xmm0, %xmm1
34698 ; SSE3-NEXT: psrlw $4, %xmm1
34699 ; SSE3-NEXT: paddb %xmm0, %xmm1
34700 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34701 ; SSE3-NEXT: pxor %xmm0, %xmm0
34702 ; SSE3-NEXT: psadbw %xmm1, %xmm0
34703 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
34704 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [60,60,60,60]
34705 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
34708 ; SSSE3-LABEL: ult_60_v2i64:
34710 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34711 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
34712 ; SSSE3-NEXT: pand %xmm1, %xmm2
34713 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34714 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
34715 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
34716 ; SSSE3-NEXT: psrlw $4, %xmm0
34717 ; SSSE3-NEXT: pand %xmm1, %xmm0
34718 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
34719 ; SSSE3-NEXT: paddb %xmm4, %xmm3
34720 ; SSSE3-NEXT: pxor %xmm0, %xmm0
34721 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
34722 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
34723 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [60,60,60,60]
34724 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
34727 ; SSE41-LABEL: ult_60_v2i64:
34729 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34730 ; SSE41-NEXT: movdqa %xmm0, %xmm2
34731 ; SSE41-NEXT: pand %xmm1, %xmm2
34732 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34733 ; SSE41-NEXT: movdqa %xmm3, %xmm4
34734 ; SSE41-NEXT: pshufb %xmm2, %xmm4
34735 ; SSE41-NEXT: psrlw $4, %xmm0
34736 ; SSE41-NEXT: pand %xmm1, %xmm0
34737 ; SSE41-NEXT: pshufb %xmm0, %xmm3
34738 ; SSE41-NEXT: paddb %xmm4, %xmm3
34739 ; SSE41-NEXT: pxor %xmm0, %xmm0
34740 ; SSE41-NEXT: psadbw %xmm3, %xmm0
34741 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
34742 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [60,60,60,60]
34743 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
34746 ; AVX1-LABEL: ult_60_v2i64:
34748 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34749 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
34750 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34751 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
34752 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
34753 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
34754 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
34755 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
34756 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
34757 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34758 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [60,60]
34759 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
34762 ; AVX2-LABEL: ult_60_v2i64:
34764 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34765 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
34766 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34767 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
34768 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
34769 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
34770 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
34771 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
34772 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
34773 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34774 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [60,60]
34775 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
34778 ; AVX512VPOPCNTDQ-LABEL: ult_60_v2i64:
34779 ; AVX512VPOPCNTDQ: # %bb.0:
34780 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
34781 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
34782 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [60,60]
34783 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
34784 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
34785 ; AVX512VPOPCNTDQ-NEXT: retq
34787 ; AVX512VPOPCNTDQVL-LABEL: ult_60_v2i64:
34788 ; AVX512VPOPCNTDQVL: # %bb.0:
34789 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
34790 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [60,60]
34791 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
34792 ; AVX512VPOPCNTDQVL-NEXT: retq
34794 ; BITALG_NOVLX-LABEL: ult_60_v2i64:
34795 ; BITALG_NOVLX: # %bb.0:
34796 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
34797 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
34798 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
34799 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34800 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [60,60]
34801 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
34802 ; BITALG_NOVLX-NEXT: vzeroupper
34803 ; BITALG_NOVLX-NEXT: retq
34805 ; BITALG-LABEL: ult_60_v2i64:
34807 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
34808 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
34809 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34810 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [60,60]
34811 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
34812 ; BITALG-NEXT: retq
34813 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
34814 %3 = icmp ult <2 x i64> %2, <i64 60, i64 60>
34815 %4 = sext <2 x i1> %3 to <2 x i64>
34819 define <2 x i64> @ugt_60_v2i64(<2 x i64> %0) {
34820 ; SSE2-LABEL: ugt_60_v2i64:
34822 ; SSE2-NEXT: movdqa %xmm0, %xmm1
34823 ; SSE2-NEXT: psrlw $1, %xmm1
34824 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34825 ; SSE2-NEXT: psubb %xmm1, %xmm0
34826 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34827 ; SSE2-NEXT: movdqa %xmm0, %xmm2
34828 ; SSE2-NEXT: pand %xmm1, %xmm2
34829 ; SSE2-NEXT: psrlw $2, %xmm0
34830 ; SSE2-NEXT: pand %xmm1, %xmm0
34831 ; SSE2-NEXT: paddb %xmm2, %xmm0
34832 ; SSE2-NEXT: movdqa %xmm0, %xmm1
34833 ; SSE2-NEXT: psrlw $4, %xmm1
34834 ; SSE2-NEXT: paddb %xmm0, %xmm1
34835 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34836 ; SSE2-NEXT: pxor %xmm0, %xmm0
34837 ; SSE2-NEXT: psadbw %xmm1, %xmm0
34838 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
34839 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
34842 ; SSE3-LABEL: ugt_60_v2i64:
34844 ; SSE3-NEXT: movdqa %xmm0, %xmm1
34845 ; SSE3-NEXT: psrlw $1, %xmm1
34846 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34847 ; SSE3-NEXT: psubb %xmm1, %xmm0
34848 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34849 ; SSE3-NEXT: movdqa %xmm0, %xmm2
34850 ; SSE3-NEXT: pand %xmm1, %xmm2
34851 ; SSE3-NEXT: psrlw $2, %xmm0
34852 ; SSE3-NEXT: pand %xmm1, %xmm0
34853 ; SSE3-NEXT: paddb %xmm2, %xmm0
34854 ; SSE3-NEXT: movdqa %xmm0, %xmm1
34855 ; SSE3-NEXT: psrlw $4, %xmm1
34856 ; SSE3-NEXT: paddb %xmm0, %xmm1
34857 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34858 ; SSE3-NEXT: pxor %xmm0, %xmm0
34859 ; SSE3-NEXT: psadbw %xmm1, %xmm0
34860 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
34861 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
34864 ; SSSE3-LABEL: ugt_60_v2i64:
34866 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34867 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
34868 ; SSSE3-NEXT: pand %xmm1, %xmm2
34869 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34870 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
34871 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
34872 ; SSSE3-NEXT: psrlw $4, %xmm0
34873 ; SSSE3-NEXT: pand %xmm1, %xmm0
34874 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
34875 ; SSSE3-NEXT: paddb %xmm4, %xmm3
34876 ; SSSE3-NEXT: pxor %xmm0, %xmm0
34877 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
34878 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
34879 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
34882 ; SSE41-LABEL: ugt_60_v2i64:
34884 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34885 ; SSE41-NEXT: movdqa %xmm0, %xmm2
34886 ; SSE41-NEXT: pand %xmm1, %xmm2
34887 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34888 ; SSE41-NEXT: movdqa %xmm3, %xmm4
34889 ; SSE41-NEXT: pshufb %xmm2, %xmm4
34890 ; SSE41-NEXT: psrlw $4, %xmm0
34891 ; SSE41-NEXT: pand %xmm1, %xmm0
34892 ; SSE41-NEXT: pshufb %xmm0, %xmm3
34893 ; SSE41-NEXT: paddb %xmm4, %xmm3
34894 ; SSE41-NEXT: pxor %xmm0, %xmm0
34895 ; SSE41-NEXT: psadbw %xmm3, %xmm0
34896 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
34897 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
34900 ; AVX1-LABEL: ugt_60_v2i64:
34902 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34903 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
34904 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34905 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
34906 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
34907 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
34908 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
34909 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
34910 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
34911 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34912 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
34915 ; AVX2-LABEL: ugt_60_v2i64:
34917 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
34918 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
34919 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
34920 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
34921 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
34922 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
34923 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
34924 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
34925 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
34926 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34927 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
34930 ; AVX512VPOPCNTDQ-LABEL: ugt_60_v2i64:
34931 ; AVX512VPOPCNTDQ: # %bb.0:
34932 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
34933 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
34934 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
34935 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
34936 ; AVX512VPOPCNTDQ-NEXT: retq
34938 ; AVX512VPOPCNTDQVL-LABEL: ugt_60_v2i64:
34939 ; AVX512VPOPCNTDQVL: # %bb.0:
34940 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
34941 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [60,60]
34942 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
34943 ; AVX512VPOPCNTDQVL-NEXT: retq
34945 ; BITALG_NOVLX-LABEL: ugt_60_v2i64:
34946 ; BITALG_NOVLX: # %bb.0:
34947 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
34948 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
34949 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
34950 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34951 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
34952 ; BITALG_NOVLX-NEXT: vzeroupper
34953 ; BITALG_NOVLX-NEXT: retq
34955 ; BITALG-LABEL: ugt_60_v2i64:
34957 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
34958 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
34959 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
34960 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [60,60]
34961 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
34962 ; BITALG-NEXT: retq
34963 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
34964 %3 = icmp ugt <2 x i64> %2, <i64 60, i64 60>
34965 %4 = sext <2 x i1> %3 to <2 x i64>
34969 define <2 x i64> @ult_61_v2i64(<2 x i64> %0) {
34970 ; SSE2-LABEL: ult_61_v2i64:
34972 ; SSE2-NEXT: movdqa %xmm0, %xmm1
34973 ; SSE2-NEXT: psrlw $1, %xmm1
34974 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34975 ; SSE2-NEXT: psubb %xmm1, %xmm0
34976 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
34977 ; SSE2-NEXT: movdqa %xmm0, %xmm2
34978 ; SSE2-NEXT: pand %xmm1, %xmm2
34979 ; SSE2-NEXT: psrlw $2, %xmm0
34980 ; SSE2-NEXT: pand %xmm1, %xmm0
34981 ; SSE2-NEXT: paddb %xmm2, %xmm0
34982 ; SSE2-NEXT: movdqa %xmm0, %xmm1
34983 ; SSE2-NEXT: psrlw $4, %xmm1
34984 ; SSE2-NEXT: paddb %xmm0, %xmm1
34985 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34986 ; SSE2-NEXT: pxor %xmm0, %xmm0
34987 ; SSE2-NEXT: psadbw %xmm1, %xmm0
34988 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
34989 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [61,61,61,61]
34990 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
34993 ; SSE3-LABEL: ult_61_v2i64:
34995 ; SSE3-NEXT: movdqa %xmm0, %xmm1
34996 ; SSE3-NEXT: psrlw $1, %xmm1
34997 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
34998 ; SSE3-NEXT: psubb %xmm1, %xmm0
34999 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
35000 ; SSE3-NEXT: movdqa %xmm0, %xmm2
35001 ; SSE3-NEXT: pand %xmm1, %xmm2
35002 ; SSE3-NEXT: psrlw $2, %xmm0
35003 ; SSE3-NEXT: pand %xmm1, %xmm0
35004 ; SSE3-NEXT: paddb %xmm2, %xmm0
35005 ; SSE3-NEXT: movdqa %xmm0, %xmm1
35006 ; SSE3-NEXT: psrlw $4, %xmm1
35007 ; SSE3-NEXT: paddb %xmm0, %xmm1
35008 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35009 ; SSE3-NEXT: pxor %xmm0, %xmm0
35010 ; SSE3-NEXT: psadbw %xmm1, %xmm0
35011 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
35012 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [61,61,61,61]
35013 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
35016 ; SSSE3-LABEL: ult_61_v2i64:
35018 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35019 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
35020 ; SSSE3-NEXT: pand %xmm1, %xmm2
35021 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35022 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
35023 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
35024 ; SSSE3-NEXT: psrlw $4, %xmm0
35025 ; SSSE3-NEXT: pand %xmm1, %xmm0
35026 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
35027 ; SSSE3-NEXT: paddb %xmm4, %xmm3
35028 ; SSSE3-NEXT: pxor %xmm0, %xmm0
35029 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
35030 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
35031 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [61,61,61,61]
35032 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
35035 ; SSE41-LABEL: ult_61_v2i64:
35037 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35038 ; SSE41-NEXT: movdqa %xmm0, %xmm2
35039 ; SSE41-NEXT: pand %xmm1, %xmm2
35040 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35041 ; SSE41-NEXT: movdqa %xmm3, %xmm4
35042 ; SSE41-NEXT: pshufb %xmm2, %xmm4
35043 ; SSE41-NEXT: psrlw $4, %xmm0
35044 ; SSE41-NEXT: pand %xmm1, %xmm0
35045 ; SSE41-NEXT: pshufb %xmm0, %xmm3
35046 ; SSE41-NEXT: paddb %xmm4, %xmm3
35047 ; SSE41-NEXT: pxor %xmm0, %xmm0
35048 ; SSE41-NEXT: psadbw %xmm3, %xmm0
35049 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
35050 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [61,61,61,61]
35051 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
35054 ; AVX1-LABEL: ult_61_v2i64:
35056 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35057 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
35058 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35059 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
35060 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
35061 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
35062 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
35063 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
35064 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
35065 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35066 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [61,61]
35067 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
35070 ; AVX2-LABEL: ult_61_v2i64:
35072 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35073 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
35074 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35075 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
35076 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
35077 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
35078 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
35079 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
35080 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
35081 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35082 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [61,61]
35083 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
35086 ; AVX512VPOPCNTDQ-LABEL: ult_61_v2i64:
35087 ; AVX512VPOPCNTDQ: # %bb.0:
35088 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
35089 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
35090 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [61,61]
35091 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
35092 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
35093 ; AVX512VPOPCNTDQ-NEXT: retq
35095 ; AVX512VPOPCNTDQVL-LABEL: ult_61_v2i64:
35096 ; AVX512VPOPCNTDQVL: # %bb.0:
35097 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
35098 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [61,61]
35099 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
35100 ; AVX512VPOPCNTDQVL-NEXT: retq
35102 ; BITALG_NOVLX-LABEL: ult_61_v2i64:
35103 ; BITALG_NOVLX: # %bb.0:
35104 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
35105 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
35106 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
35107 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35108 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [61,61]
35109 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
35110 ; BITALG_NOVLX-NEXT: vzeroupper
35111 ; BITALG_NOVLX-NEXT: retq
35113 ; BITALG-LABEL: ult_61_v2i64:
35115 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
35116 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
35117 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35118 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [61,61]
35119 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
35120 ; BITALG-NEXT: retq
35121 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
35122 %3 = icmp ult <2 x i64> %2, <i64 61, i64 61>
35123 %4 = sext <2 x i1> %3 to <2 x i64>
35127 define <2 x i64> @ugt_61_v2i64(<2 x i64> %0) {
35128 ; SSE2-LABEL: ugt_61_v2i64:
35130 ; SSE2-NEXT: movdqa %xmm0, %xmm1
35131 ; SSE2-NEXT: psrlw $1, %xmm1
35132 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35133 ; SSE2-NEXT: psubb %xmm1, %xmm0
35134 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
35135 ; SSE2-NEXT: movdqa %xmm0, %xmm2
35136 ; SSE2-NEXT: pand %xmm1, %xmm2
35137 ; SSE2-NEXT: psrlw $2, %xmm0
35138 ; SSE2-NEXT: pand %xmm1, %xmm0
35139 ; SSE2-NEXT: paddb %xmm2, %xmm0
35140 ; SSE2-NEXT: movdqa %xmm0, %xmm1
35141 ; SSE2-NEXT: psrlw $4, %xmm1
35142 ; SSE2-NEXT: paddb %xmm0, %xmm1
35143 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35144 ; SSE2-NEXT: pxor %xmm0, %xmm0
35145 ; SSE2-NEXT: psadbw %xmm1, %xmm0
35146 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
35147 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
35150 ; SSE3-LABEL: ugt_61_v2i64:
35152 ; SSE3-NEXT: movdqa %xmm0, %xmm1
35153 ; SSE3-NEXT: psrlw $1, %xmm1
35154 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35155 ; SSE3-NEXT: psubb %xmm1, %xmm0
35156 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
35157 ; SSE3-NEXT: movdqa %xmm0, %xmm2
35158 ; SSE3-NEXT: pand %xmm1, %xmm2
35159 ; SSE3-NEXT: psrlw $2, %xmm0
35160 ; SSE3-NEXT: pand %xmm1, %xmm0
35161 ; SSE3-NEXT: paddb %xmm2, %xmm0
35162 ; SSE3-NEXT: movdqa %xmm0, %xmm1
35163 ; SSE3-NEXT: psrlw $4, %xmm1
35164 ; SSE3-NEXT: paddb %xmm0, %xmm1
35165 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35166 ; SSE3-NEXT: pxor %xmm0, %xmm0
35167 ; SSE3-NEXT: psadbw %xmm1, %xmm0
35168 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
35169 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
35172 ; SSSE3-LABEL: ugt_61_v2i64:
35174 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35175 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
35176 ; SSSE3-NEXT: pand %xmm1, %xmm2
35177 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35178 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
35179 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
35180 ; SSSE3-NEXT: psrlw $4, %xmm0
35181 ; SSSE3-NEXT: pand %xmm1, %xmm0
35182 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
35183 ; SSSE3-NEXT: paddb %xmm4, %xmm3
35184 ; SSSE3-NEXT: pxor %xmm0, %xmm0
35185 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
35186 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
35187 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
35190 ; SSE41-LABEL: ugt_61_v2i64:
35192 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35193 ; SSE41-NEXT: movdqa %xmm0, %xmm2
35194 ; SSE41-NEXT: pand %xmm1, %xmm2
35195 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35196 ; SSE41-NEXT: movdqa %xmm3, %xmm4
35197 ; SSE41-NEXT: pshufb %xmm2, %xmm4
35198 ; SSE41-NEXT: psrlw $4, %xmm0
35199 ; SSE41-NEXT: pand %xmm1, %xmm0
35200 ; SSE41-NEXT: pshufb %xmm0, %xmm3
35201 ; SSE41-NEXT: paddb %xmm4, %xmm3
35202 ; SSE41-NEXT: pxor %xmm0, %xmm0
35203 ; SSE41-NEXT: psadbw %xmm3, %xmm0
35204 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
35205 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
35208 ; AVX1-LABEL: ugt_61_v2i64:
35210 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35211 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
35212 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35213 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
35214 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
35215 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
35216 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
35217 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
35218 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
35219 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35220 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
35223 ; AVX2-LABEL: ugt_61_v2i64:
35225 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35226 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
35227 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35228 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
35229 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
35230 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
35231 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
35232 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
35233 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
35234 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35235 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
35238 ; AVX512VPOPCNTDQ-LABEL: ugt_61_v2i64:
35239 ; AVX512VPOPCNTDQ: # %bb.0:
35240 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
35241 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
35242 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
35243 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
35244 ; AVX512VPOPCNTDQ-NEXT: retq
35246 ; AVX512VPOPCNTDQVL-LABEL: ugt_61_v2i64:
35247 ; AVX512VPOPCNTDQVL: # %bb.0:
35248 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
35249 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [61,61]
35250 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
35251 ; AVX512VPOPCNTDQVL-NEXT: retq
35253 ; BITALG_NOVLX-LABEL: ugt_61_v2i64:
35254 ; BITALG_NOVLX: # %bb.0:
35255 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
35256 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
35257 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
35258 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35259 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
35260 ; BITALG_NOVLX-NEXT: vzeroupper
35261 ; BITALG_NOVLX-NEXT: retq
35263 ; BITALG-LABEL: ugt_61_v2i64:
35265 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
35266 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
35267 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35268 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [61,61]
35269 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
35270 ; BITALG-NEXT: retq
35271 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
35272 %3 = icmp ugt <2 x i64> %2, <i64 61, i64 61>
35273 %4 = sext <2 x i1> %3 to <2 x i64>
35277 define <2 x i64> @ult_62_v2i64(<2 x i64> %0) {
35278 ; SSE2-LABEL: ult_62_v2i64:
35280 ; SSE2-NEXT: movdqa %xmm0, %xmm1
35281 ; SSE2-NEXT: psrlw $1, %xmm1
35282 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35283 ; SSE2-NEXT: psubb %xmm1, %xmm0
35284 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
35285 ; SSE2-NEXT: movdqa %xmm0, %xmm2
35286 ; SSE2-NEXT: pand %xmm1, %xmm2
35287 ; SSE2-NEXT: psrlw $2, %xmm0
35288 ; SSE2-NEXT: pand %xmm1, %xmm0
35289 ; SSE2-NEXT: paddb %xmm2, %xmm0
35290 ; SSE2-NEXT: movdqa %xmm0, %xmm1
35291 ; SSE2-NEXT: psrlw $4, %xmm1
35292 ; SSE2-NEXT: paddb %xmm0, %xmm1
35293 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35294 ; SSE2-NEXT: pxor %xmm0, %xmm0
35295 ; SSE2-NEXT: psadbw %xmm1, %xmm0
35296 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
35297 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [62,62,62,62]
35298 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
35301 ; SSE3-LABEL: ult_62_v2i64:
35303 ; SSE3-NEXT: movdqa %xmm0, %xmm1
35304 ; SSE3-NEXT: psrlw $1, %xmm1
35305 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35306 ; SSE3-NEXT: psubb %xmm1, %xmm0
35307 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
35308 ; SSE3-NEXT: movdqa %xmm0, %xmm2
35309 ; SSE3-NEXT: pand %xmm1, %xmm2
35310 ; SSE3-NEXT: psrlw $2, %xmm0
35311 ; SSE3-NEXT: pand %xmm1, %xmm0
35312 ; SSE3-NEXT: paddb %xmm2, %xmm0
35313 ; SSE3-NEXT: movdqa %xmm0, %xmm1
35314 ; SSE3-NEXT: psrlw $4, %xmm1
35315 ; SSE3-NEXT: paddb %xmm0, %xmm1
35316 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35317 ; SSE3-NEXT: pxor %xmm0, %xmm0
35318 ; SSE3-NEXT: psadbw %xmm1, %xmm0
35319 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
35320 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [62,62,62,62]
35321 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
35324 ; SSSE3-LABEL: ult_62_v2i64:
35326 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35327 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
35328 ; SSSE3-NEXT: pand %xmm1, %xmm2
35329 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35330 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
35331 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
35332 ; SSSE3-NEXT: psrlw $4, %xmm0
35333 ; SSSE3-NEXT: pand %xmm1, %xmm0
35334 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
35335 ; SSSE3-NEXT: paddb %xmm4, %xmm3
35336 ; SSSE3-NEXT: pxor %xmm0, %xmm0
35337 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
35338 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
35339 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [62,62,62,62]
35340 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
35343 ; SSE41-LABEL: ult_62_v2i64:
35345 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35346 ; SSE41-NEXT: movdqa %xmm0, %xmm2
35347 ; SSE41-NEXT: pand %xmm1, %xmm2
35348 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35349 ; SSE41-NEXT: movdqa %xmm3, %xmm4
35350 ; SSE41-NEXT: pshufb %xmm2, %xmm4
35351 ; SSE41-NEXT: psrlw $4, %xmm0
35352 ; SSE41-NEXT: pand %xmm1, %xmm0
35353 ; SSE41-NEXT: pshufb %xmm0, %xmm3
35354 ; SSE41-NEXT: paddb %xmm4, %xmm3
35355 ; SSE41-NEXT: pxor %xmm0, %xmm0
35356 ; SSE41-NEXT: psadbw %xmm3, %xmm0
35357 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
35358 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [62,62,62,62]
35359 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
35362 ; AVX1-LABEL: ult_62_v2i64:
35364 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35365 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
35366 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35367 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
35368 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
35369 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
35370 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
35371 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
35372 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
35373 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35374 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [62,62]
35375 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
35378 ; AVX2-LABEL: ult_62_v2i64:
35380 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35381 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
35382 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35383 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
35384 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
35385 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
35386 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
35387 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
35388 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
35389 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35390 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [62,62]
35391 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
35394 ; AVX512VPOPCNTDQ-LABEL: ult_62_v2i64:
35395 ; AVX512VPOPCNTDQ: # %bb.0:
35396 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
35397 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
35398 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [62,62]
35399 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
35400 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
35401 ; AVX512VPOPCNTDQ-NEXT: retq
35403 ; AVX512VPOPCNTDQVL-LABEL: ult_62_v2i64:
35404 ; AVX512VPOPCNTDQVL: # %bb.0:
35405 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
35406 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [62,62]
35407 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
35408 ; AVX512VPOPCNTDQVL-NEXT: retq
35410 ; BITALG_NOVLX-LABEL: ult_62_v2i64:
35411 ; BITALG_NOVLX: # %bb.0:
35412 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
35413 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
35414 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
35415 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35416 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [62,62]
35417 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
35418 ; BITALG_NOVLX-NEXT: vzeroupper
35419 ; BITALG_NOVLX-NEXT: retq
35421 ; BITALG-LABEL: ult_62_v2i64:
35423 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
35424 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
35425 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35426 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [62,62]
35427 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
35428 ; BITALG-NEXT: retq
35429 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
35430 %3 = icmp ult <2 x i64> %2, <i64 62, i64 62>
35431 %4 = sext <2 x i1> %3 to <2 x i64>
35435 define <2 x i64> @ugt_62_v2i64(<2 x i64> %0) {
35436 ; SSE2-LABEL: ugt_62_v2i64:
35438 ; SSE2-NEXT: movdqa %xmm0, %xmm1
35439 ; SSE2-NEXT: psrlw $1, %xmm1
35440 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35441 ; SSE2-NEXT: psubb %xmm1, %xmm0
35442 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
35443 ; SSE2-NEXT: movdqa %xmm0, %xmm2
35444 ; SSE2-NEXT: pand %xmm1, %xmm2
35445 ; SSE2-NEXT: psrlw $2, %xmm0
35446 ; SSE2-NEXT: pand %xmm1, %xmm0
35447 ; SSE2-NEXT: paddb %xmm2, %xmm0
35448 ; SSE2-NEXT: movdqa %xmm0, %xmm1
35449 ; SSE2-NEXT: psrlw $4, %xmm1
35450 ; SSE2-NEXT: paddb %xmm0, %xmm1
35451 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35452 ; SSE2-NEXT: pxor %xmm0, %xmm0
35453 ; SSE2-NEXT: psadbw %xmm1, %xmm0
35454 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
35455 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
35458 ; SSE3-LABEL: ugt_62_v2i64:
35460 ; SSE3-NEXT: movdqa %xmm0, %xmm1
35461 ; SSE3-NEXT: psrlw $1, %xmm1
35462 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35463 ; SSE3-NEXT: psubb %xmm1, %xmm0
35464 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
35465 ; SSE3-NEXT: movdqa %xmm0, %xmm2
35466 ; SSE3-NEXT: pand %xmm1, %xmm2
35467 ; SSE3-NEXT: psrlw $2, %xmm0
35468 ; SSE3-NEXT: pand %xmm1, %xmm0
35469 ; SSE3-NEXT: paddb %xmm2, %xmm0
35470 ; SSE3-NEXT: movdqa %xmm0, %xmm1
35471 ; SSE3-NEXT: psrlw $4, %xmm1
35472 ; SSE3-NEXT: paddb %xmm0, %xmm1
35473 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35474 ; SSE3-NEXT: pxor %xmm0, %xmm0
35475 ; SSE3-NEXT: psadbw %xmm1, %xmm0
35476 ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
35477 ; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
35480 ; SSSE3-LABEL: ugt_62_v2i64:
35482 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35483 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
35484 ; SSSE3-NEXT: pand %xmm1, %xmm2
35485 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35486 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
35487 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
35488 ; SSSE3-NEXT: psrlw $4, %xmm0
35489 ; SSSE3-NEXT: pand %xmm1, %xmm0
35490 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
35491 ; SSSE3-NEXT: paddb %xmm4, %xmm3
35492 ; SSSE3-NEXT: pxor %xmm0, %xmm0
35493 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
35494 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
35495 ; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
35498 ; SSE41-LABEL: ugt_62_v2i64:
35500 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35501 ; SSE41-NEXT: movdqa %xmm0, %xmm2
35502 ; SSE41-NEXT: pand %xmm1, %xmm2
35503 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35504 ; SSE41-NEXT: movdqa %xmm3, %xmm4
35505 ; SSE41-NEXT: pshufb %xmm2, %xmm4
35506 ; SSE41-NEXT: psrlw $4, %xmm0
35507 ; SSE41-NEXT: pand %xmm1, %xmm0
35508 ; SSE41-NEXT: pshufb %xmm0, %xmm3
35509 ; SSE41-NEXT: paddb %xmm4, %xmm3
35510 ; SSE41-NEXT: pxor %xmm0, %xmm0
35511 ; SSE41-NEXT: psadbw %xmm3, %xmm0
35512 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
35513 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
35516 ; AVX1-LABEL: ugt_62_v2i64:
35518 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35519 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
35520 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35521 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
35522 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
35523 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
35524 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
35525 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
35526 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
35527 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35528 ; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
35531 ; AVX2-LABEL: ugt_62_v2i64:
35533 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35534 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
35535 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35536 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
35537 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
35538 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
35539 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
35540 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
35541 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
35542 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35543 ; AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
35546 ; AVX512VPOPCNTDQ-LABEL: ugt_62_v2i64:
35547 ; AVX512VPOPCNTDQ: # %bb.0:
35548 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
35549 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
35550 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
35551 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
35552 ; AVX512VPOPCNTDQ-NEXT: retq
35554 ; AVX512VPOPCNTDQVL-LABEL: ugt_62_v2i64:
35555 ; AVX512VPOPCNTDQVL: # %bb.0:
35556 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
35557 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [62,62]
35558 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
35559 ; AVX512VPOPCNTDQVL-NEXT: retq
35561 ; BITALG_NOVLX-LABEL: ugt_62_v2i64:
35562 ; BITALG_NOVLX: # %bb.0:
35563 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
35564 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
35565 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
35566 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35567 ; BITALG_NOVLX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
35568 ; BITALG_NOVLX-NEXT: vzeroupper
35569 ; BITALG_NOVLX-NEXT: retq
35571 ; BITALG-LABEL: ugt_62_v2i64:
35573 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
35574 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
35575 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35576 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [62,62]
35577 ; BITALG-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
35578 ; BITALG-NEXT: retq
35579 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
35580 %3 = icmp ugt <2 x i64> %2, <i64 62, i64 62>
35581 %4 = sext <2 x i1> %3 to <2 x i64>
35585 define <2 x i64> @ult_63_v2i64(<2 x i64> %0) {
35586 ; SSE2-LABEL: ult_63_v2i64:
35588 ; SSE2-NEXT: movdqa %xmm0, %xmm1
35589 ; SSE2-NEXT: psrlw $1, %xmm1
35590 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35591 ; SSE2-NEXT: psubb %xmm1, %xmm0
35592 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
35593 ; SSE2-NEXT: movdqa %xmm0, %xmm2
35594 ; SSE2-NEXT: pand %xmm1, %xmm2
35595 ; SSE2-NEXT: psrlw $2, %xmm0
35596 ; SSE2-NEXT: pand %xmm1, %xmm0
35597 ; SSE2-NEXT: paddb %xmm2, %xmm0
35598 ; SSE2-NEXT: movdqa %xmm0, %xmm1
35599 ; SSE2-NEXT: psrlw $4, %xmm1
35600 ; SSE2-NEXT: paddb %xmm0, %xmm1
35601 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35602 ; SSE2-NEXT: pxor %xmm0, %xmm0
35603 ; SSE2-NEXT: psadbw %xmm1, %xmm0
35604 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
35605 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [63,63,63,63]
35606 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
35609 ; SSE3-LABEL: ult_63_v2i64:
35611 ; SSE3-NEXT: movdqa %xmm0, %xmm1
35612 ; SSE3-NEXT: psrlw $1, %xmm1
35613 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35614 ; SSE3-NEXT: psubb %xmm1, %xmm0
35615 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
35616 ; SSE3-NEXT: movdqa %xmm0, %xmm2
35617 ; SSE3-NEXT: pand %xmm1, %xmm2
35618 ; SSE3-NEXT: psrlw $2, %xmm0
35619 ; SSE3-NEXT: pand %xmm1, %xmm0
35620 ; SSE3-NEXT: paddb %xmm2, %xmm0
35621 ; SSE3-NEXT: movdqa %xmm0, %xmm1
35622 ; SSE3-NEXT: psrlw $4, %xmm1
35623 ; SSE3-NEXT: paddb %xmm0, %xmm1
35624 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
35625 ; SSE3-NEXT: pxor %xmm0, %xmm0
35626 ; SSE3-NEXT: psadbw %xmm1, %xmm0
35627 ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
35628 ; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [63,63,63,63]
35629 ; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
35632 ; SSSE3-LABEL: ult_63_v2i64:
35634 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35635 ; SSSE3-NEXT: movdqa %xmm0, %xmm2
35636 ; SSSE3-NEXT: pand %xmm1, %xmm2
35637 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35638 ; SSSE3-NEXT: movdqa %xmm3, %xmm4
35639 ; SSSE3-NEXT: pshufb %xmm2, %xmm4
35640 ; SSSE3-NEXT: psrlw $4, %xmm0
35641 ; SSSE3-NEXT: pand %xmm1, %xmm0
35642 ; SSSE3-NEXT: pshufb %xmm0, %xmm3
35643 ; SSSE3-NEXT: paddb %xmm4, %xmm3
35644 ; SSSE3-NEXT: pxor %xmm0, %xmm0
35645 ; SSSE3-NEXT: psadbw %xmm3, %xmm0
35646 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
35647 ; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [63,63,63,63]
35648 ; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
35651 ; SSE41-LABEL: ult_63_v2i64:
35653 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35654 ; SSE41-NEXT: movdqa %xmm0, %xmm2
35655 ; SSE41-NEXT: pand %xmm1, %xmm2
35656 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35657 ; SSE41-NEXT: movdqa %xmm3, %xmm4
35658 ; SSE41-NEXT: pshufb %xmm2, %xmm4
35659 ; SSE41-NEXT: psrlw $4, %xmm0
35660 ; SSE41-NEXT: pand %xmm1, %xmm0
35661 ; SSE41-NEXT: pshufb %xmm0, %xmm3
35662 ; SSE41-NEXT: paddb %xmm4, %xmm3
35663 ; SSE41-NEXT: pxor %xmm0, %xmm0
35664 ; SSE41-NEXT: psadbw %xmm3, %xmm0
35665 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
35666 ; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [63,63,63,63]
35667 ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
35670 ; AVX1-LABEL: ult_63_v2i64:
35672 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35673 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
35674 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35675 ; AVX1-NEXT: vpshufb %xmm2, %xmm3, %xmm2
35676 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
35677 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
35678 ; AVX1-NEXT: vpshufb %xmm0, %xmm3, %xmm0
35679 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
35680 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
35681 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35682 ; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [63,63]
35683 ; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
35686 ; AVX2-LABEL: ult_63_v2i64:
35688 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
35689 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2
35690 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
35691 ; AVX2-NEXT: vpshufb %xmm2, %xmm3, %xmm2
35692 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
35693 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
35694 ; AVX2-NEXT: vpshufb %xmm0, %xmm3, %xmm0
35695 ; AVX2-NEXT: vpaddb %xmm2, %xmm0, %xmm0
35696 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
35697 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35698 ; AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [63,63]
35699 ; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
35702 ; AVX512VPOPCNTDQ-LABEL: ult_63_v2i64:
35703 ; AVX512VPOPCNTDQ: # %bb.0:
35704 ; AVX512VPOPCNTDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
35705 ; AVX512VPOPCNTDQ-NEXT: vpopcntq %zmm0, %zmm0
35706 ; AVX512VPOPCNTDQ-NEXT: vpmovsxbq {{.*#+}} xmm1 = [63,63]
35707 ; AVX512VPOPCNTDQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
35708 ; AVX512VPOPCNTDQ-NEXT: vzeroupper
35709 ; AVX512VPOPCNTDQ-NEXT: retq
35711 ; AVX512VPOPCNTDQVL-LABEL: ult_63_v2i64:
35712 ; AVX512VPOPCNTDQVL: # %bb.0:
35713 ; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
35714 ; AVX512VPOPCNTDQVL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [63,63]
35715 ; AVX512VPOPCNTDQVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
35716 ; AVX512VPOPCNTDQVL-NEXT: retq
35718 ; BITALG_NOVLX-LABEL: ult_63_v2i64:
35719 ; BITALG_NOVLX: # %bb.0:
35720 ; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
35721 ; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
35722 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
35723 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35724 ; BITALG_NOVLX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [63,63]
35725 ; BITALG_NOVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
35726 ; BITALG_NOVLX-NEXT: vzeroupper
35727 ; BITALG_NOVLX-NEXT: retq
35729 ; BITALG-LABEL: ult_63_v2i64:
35731 ; BITALG-NEXT: vpopcntb %xmm0, %xmm0
35732 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
35733 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
35734 ; BITALG-NEXT: vpbroadcastq {{.*#+}} xmm1 = [63,63]
35735 ; BITALG-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
35736 ; BITALG-NEXT: retq
35737 %2 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0)
35738 %3 = icmp ult <2 x i64> %2, <i64 63, i64 63>
35739 %4 = sext <2 x i1> %3 to <2 x i64>
35743 declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>)
35744 declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
35745 declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
35746 declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)