1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86,X86-NOSSE
3 ; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64
4 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+popcnt | FileCheck %s --check-prefix=X86-POPCNT
5 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+popcnt | FileCheck %s --check-prefix=X64-POPCNT
6 ; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2
7 ; RUN: llc < %s -mtriple=i686-unknown -mattr=ssse3 | FileCheck %s --check-prefixes=X86,X86-SSSE3
9 define i8 @cnt8(i8 %x) nounwind readnone {
12 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
13 ; X86-NEXT: movl %ecx, %eax
15 ; X86-NEXT: andb $85, %al
16 ; X86-NEXT: subb %al, %cl
17 ; X86-NEXT: movl %ecx, %eax
18 ; X86-NEXT: andb $51, %al
19 ; X86-NEXT: shrb $2, %cl
20 ; X86-NEXT: andb $51, %cl
21 ; X86-NEXT: addb %al, %cl
22 ; X86-NEXT: movl %ecx, %eax
23 ; X86-NEXT: shrb $4, %al
24 ; X86-NEXT: addb %cl, %al
25 ; X86-NEXT: andb $15, %al
30 ; X64-NEXT: movl %edi, %eax
32 ; X64-NEXT: andb $85, %al
33 ; X64-NEXT: subb %al, %dil
34 ; X64-NEXT: movl %edi, %ecx
35 ; X64-NEXT: andb $51, %cl
36 ; X64-NEXT: shrb $2, %dil
37 ; X64-NEXT: andb $51, %dil
38 ; X64-NEXT: addb %dil, %cl
39 ; X64-NEXT: movl %ecx, %eax
40 ; X64-NEXT: shrb $4, %al
41 ; X64-NEXT: addb %cl, %al
42 ; X64-NEXT: andb $15, %al
45 ; X86-POPCNT-LABEL: cnt8:
46 ; X86-POPCNT: # %bb.0:
47 ; X86-POPCNT-NEXT: movzbl {{[0-9]+}}(%esp), %eax
48 ; X86-POPCNT-NEXT: popcntl %eax, %eax
49 ; X86-POPCNT-NEXT: # kill: def $al killed $al killed $eax
50 ; X86-POPCNT-NEXT: retl
52 ; X64-POPCNT-LABEL: cnt8:
53 ; X64-POPCNT: # %bb.0:
54 ; X64-POPCNT-NEXT: movzbl %dil, %eax
55 ; X64-POPCNT-NEXT: popcntl %eax, %eax
56 ; X64-POPCNT-NEXT: # kill: def $al killed $al killed $eax
57 ; X64-POPCNT-NEXT: retq
58 %cnt = tail call i8 @llvm.ctpop.i8(i8 %x)
62 define i16 @cnt16(i16 %x) nounwind readnone {
65 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
66 ; X86-NEXT: movl %eax, %ecx
68 ; X86-NEXT: andl $21845, %ecx # imm = 0x5555
69 ; X86-NEXT: subl %ecx, %eax
70 ; X86-NEXT: movl %eax, %ecx
71 ; X86-NEXT: andl $13107, %ecx # imm = 0x3333
72 ; X86-NEXT: shrl $2, %eax
73 ; X86-NEXT: andl $13107, %eax # imm = 0x3333
74 ; X86-NEXT: addl %ecx, %eax
75 ; X86-NEXT: movl %eax, %ecx
76 ; X86-NEXT: shrl $4, %ecx
77 ; X86-NEXT: addl %eax, %ecx
78 ; X86-NEXT: andl $3855, %ecx # imm = 0xF0F
79 ; X86-NEXT: movl %ecx, %eax
80 ; X86-NEXT: shrl $8, %eax
81 ; X86-NEXT: addl %ecx, %eax
82 ; X86-NEXT: movzbl %al, %eax
83 ; X86-NEXT: # kill: def $ax killed $ax killed $eax
88 ; X64-NEXT: movl %edi, %eax
90 ; X64-NEXT: andl $21845, %eax # imm = 0x5555
91 ; X64-NEXT: subl %eax, %edi
92 ; X64-NEXT: movl %edi, %eax
93 ; X64-NEXT: andl $13107, %eax # imm = 0x3333
94 ; X64-NEXT: shrl $2, %edi
95 ; X64-NEXT: andl $13107, %edi # imm = 0x3333
96 ; X64-NEXT: addl %eax, %edi
97 ; X64-NEXT: movl %edi, %eax
98 ; X64-NEXT: shrl $4, %eax
99 ; X64-NEXT: addl %edi, %eax
100 ; X64-NEXT: andl $3855, %eax # imm = 0xF0F
101 ; X64-NEXT: movl %eax, %ecx
102 ; X64-NEXT: shrl $8, %ecx
103 ; X64-NEXT: addl %eax, %ecx
104 ; X64-NEXT: movzbl %cl, %eax
105 ; X64-NEXT: # kill: def $ax killed $ax killed $eax
108 ; X86-POPCNT-LABEL: cnt16:
109 ; X86-POPCNT: # %bb.0:
110 ; X86-POPCNT-NEXT: movzwl {{[0-9]+}}(%esp), %eax
111 ; X86-POPCNT-NEXT: popcntl %eax, %eax
112 ; X86-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax
113 ; X86-POPCNT-NEXT: retl
115 ; X64-POPCNT-LABEL: cnt16:
116 ; X64-POPCNT: # %bb.0:
117 ; X64-POPCNT-NEXT: movzwl %di, %eax
118 ; X64-POPCNT-NEXT: popcntl %eax, %eax
119 ; X64-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax
120 ; X64-POPCNT-NEXT: retq
121 %cnt = tail call i16 @llvm.ctpop.i16(i16 %x)
125 define i32 @cnt32(i32 %x) nounwind readnone {
128 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
129 ; X86-NEXT: movl %eax, %ecx
130 ; X86-NEXT: shrl %ecx
131 ; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
132 ; X86-NEXT: subl %ecx, %eax
133 ; X86-NEXT: movl %eax, %ecx
134 ; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
135 ; X86-NEXT: shrl $2, %eax
136 ; X86-NEXT: andl $858993459, %eax # imm = 0x33333333
137 ; X86-NEXT: addl %ecx, %eax
138 ; X86-NEXT: movl %eax, %ecx
139 ; X86-NEXT: shrl $4, %ecx
140 ; X86-NEXT: addl %eax, %ecx
141 ; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
142 ; X86-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
143 ; X86-NEXT: shrl $24, %eax
148 ; X64-NEXT: movl %edi, %eax
149 ; X64-NEXT: shrl %eax
150 ; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555
151 ; X64-NEXT: subl %eax, %edi
152 ; X64-NEXT: movl %edi, %eax
153 ; X64-NEXT: andl $858993459, %eax # imm = 0x33333333
154 ; X64-NEXT: shrl $2, %edi
155 ; X64-NEXT: andl $858993459, %edi # imm = 0x33333333
156 ; X64-NEXT: addl %eax, %edi
157 ; X64-NEXT: movl %edi, %eax
158 ; X64-NEXT: shrl $4, %eax
159 ; X64-NEXT: addl %edi, %eax
160 ; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
161 ; X64-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
162 ; X64-NEXT: shrl $24, %eax
165 ; X86-POPCNT-LABEL: cnt32:
166 ; X86-POPCNT: # %bb.0:
167 ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
168 ; X86-POPCNT-NEXT: retl
170 ; X64-POPCNT-LABEL: cnt32:
171 ; X64-POPCNT: # %bb.0:
172 ; X64-POPCNT-NEXT: popcntl %edi, %eax
173 ; X64-POPCNT-NEXT: retq
174 %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
178 define i64 @cnt64(i64 %x) nounwind readnone {
179 ; X86-NOSSE-LABEL: cnt64:
180 ; X86-NOSSE: # %bb.0:
181 ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
182 ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
183 ; X86-NOSSE-NEXT: movl %ecx, %edx
184 ; X86-NOSSE-NEXT: shrl %edx
185 ; X86-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555
186 ; X86-NOSSE-NEXT: subl %edx, %ecx
187 ; X86-NOSSE-NEXT: movl %ecx, %edx
188 ; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333
189 ; X86-NOSSE-NEXT: shrl $2, %ecx
190 ; X86-NOSSE-NEXT: andl $858993459, %ecx # imm = 0x33333333
191 ; X86-NOSSE-NEXT: addl %edx, %ecx
192 ; X86-NOSSE-NEXT: movl %ecx, %edx
193 ; X86-NOSSE-NEXT: shrl $4, %edx
194 ; X86-NOSSE-NEXT: addl %ecx, %edx
195 ; X86-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
196 ; X86-NOSSE-NEXT: imull $16843009, %edx, %ecx # imm = 0x1010101
197 ; X86-NOSSE-NEXT: shrl $24, %ecx
198 ; X86-NOSSE-NEXT: movl %eax, %edx
199 ; X86-NOSSE-NEXT: shrl %edx
200 ; X86-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555
201 ; X86-NOSSE-NEXT: subl %edx, %eax
202 ; X86-NOSSE-NEXT: movl %eax, %edx
203 ; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333
204 ; X86-NOSSE-NEXT: shrl $2, %eax
205 ; X86-NOSSE-NEXT: andl $858993459, %eax # imm = 0x33333333
206 ; X86-NOSSE-NEXT: addl %edx, %eax
207 ; X86-NOSSE-NEXT: movl %eax, %edx
208 ; X86-NOSSE-NEXT: shrl $4, %edx
209 ; X86-NOSSE-NEXT: addl %eax, %edx
210 ; X86-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
211 ; X86-NOSSE-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101
212 ; X86-NOSSE-NEXT: shrl $24, %eax
213 ; X86-NOSSE-NEXT: addl %ecx, %eax
214 ; X86-NOSSE-NEXT: xorl %edx, %edx
215 ; X86-NOSSE-NEXT: retl
219 ; X64-NEXT: movq %rdi, %rax
220 ; X64-NEXT: shrq %rax
221 ; X64-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
222 ; X64-NEXT: andq %rax, %rcx
223 ; X64-NEXT: subq %rcx, %rdi
224 ; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
225 ; X64-NEXT: movq %rdi, %rcx
226 ; X64-NEXT: andq %rax, %rcx
227 ; X64-NEXT: shrq $2, %rdi
228 ; X64-NEXT: andq %rdi, %rax
229 ; X64-NEXT: addq %rcx, %rax
230 ; X64-NEXT: movq %rax, %rcx
231 ; X64-NEXT: shrq $4, %rcx
232 ; X64-NEXT: addq %rax, %rcx
233 ; X64-NEXT: movabsq $1085102592571150095, %rdx # imm = 0xF0F0F0F0F0F0F0F
234 ; X64-NEXT: andq %rcx, %rdx
235 ; X64-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101
236 ; X64-NEXT: imulq %rdx, %rax
237 ; X64-NEXT: shrq $56, %rax
240 ; X86-POPCNT-LABEL: cnt64:
241 ; X86-POPCNT: # %bb.0:
242 ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
243 ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
244 ; X86-POPCNT-NEXT: addl %ecx, %eax
245 ; X86-POPCNT-NEXT: xorl %edx, %edx
246 ; X86-POPCNT-NEXT: retl
248 ; X64-POPCNT-LABEL: cnt64:
249 ; X64-POPCNT: # %bb.0:
250 ; X64-POPCNT-NEXT: popcntq %rdi, %rax
251 ; X64-POPCNT-NEXT: retq
253 ; X86-SSE2-LABEL: cnt64:
255 ; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
256 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
257 ; X86-SSE2-NEXT: psrlw $1, %xmm1
258 ; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
259 ; X86-SSE2-NEXT: psubb %xmm1, %xmm0
260 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
261 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
262 ; X86-SSE2-NEXT: pand %xmm1, %xmm2
263 ; X86-SSE2-NEXT: psrlw $2, %xmm0
264 ; X86-SSE2-NEXT: pand %xmm1, %xmm0
265 ; X86-SSE2-NEXT: paddb %xmm2, %xmm0
266 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
267 ; X86-SSE2-NEXT: psrlw $4, %xmm1
268 ; X86-SSE2-NEXT: paddb %xmm0, %xmm1
269 ; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
270 ; X86-SSE2-NEXT: pxor %xmm0, %xmm0
271 ; X86-SSE2-NEXT: psadbw %xmm1, %xmm0
272 ; X86-SSE2-NEXT: movd %xmm0, %eax
273 ; X86-SSE2-NEXT: xorl %edx, %edx
274 ; X86-SSE2-NEXT: retl
276 ; X86-SSSE3-LABEL: cnt64:
277 ; X86-SSSE3: # %bb.0:
278 ; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
279 ; X86-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
280 ; X86-SSSE3-NEXT: movdqa %xmm1, %xmm2
281 ; X86-SSSE3-NEXT: pand %xmm0, %xmm2
282 ; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
283 ; X86-SSSE3-NEXT: movdqa %xmm3, %xmm4
284 ; X86-SSSE3-NEXT: pshufb %xmm2, %xmm4
285 ; X86-SSSE3-NEXT: psrlw $4, %xmm1
286 ; X86-SSSE3-NEXT: pand %xmm0, %xmm1
287 ; X86-SSSE3-NEXT: pshufb %xmm1, %xmm3
288 ; X86-SSSE3-NEXT: paddb %xmm4, %xmm3
289 ; X86-SSSE3-NEXT: pxor %xmm0, %xmm0
290 ; X86-SSSE3-NEXT: psadbw %xmm3, %xmm0
291 ; X86-SSSE3-NEXT: movd %xmm0, %eax
292 ; X86-SSSE3-NEXT: xorl %edx, %edx
293 ; X86-SSSE3-NEXT: retl
294 %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
298 define i128 @cnt128(i128 %x) nounwind readnone {
299 ; X86-NOSSE-LABEL: cnt128:
300 ; X86-NOSSE: # %bb.0:
301 ; X86-NOSSE-NEXT: pushl %ebx
302 ; X86-NOSSE-NEXT: pushl %edi
303 ; X86-NOSSE-NEXT: pushl %esi
304 ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
305 ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
306 ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
307 ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
308 ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edi
309 ; X86-NOSSE-NEXT: movl %edi, %ebx
310 ; X86-NOSSE-NEXT: shrl %ebx
311 ; X86-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555
312 ; X86-NOSSE-NEXT: subl %ebx, %edi
313 ; X86-NOSSE-NEXT: movl %edi, %ebx
314 ; X86-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333
315 ; X86-NOSSE-NEXT: shrl $2, %edi
316 ; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333
317 ; X86-NOSSE-NEXT: addl %ebx, %edi
318 ; X86-NOSSE-NEXT: movl %edi, %ebx
319 ; X86-NOSSE-NEXT: shrl $4, %ebx
320 ; X86-NOSSE-NEXT: addl %edi, %ebx
321 ; X86-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F
322 ; X86-NOSSE-NEXT: imull $16843009, %ebx, %edi # imm = 0x1010101
323 ; X86-NOSSE-NEXT: shrl $24, %edi
324 ; X86-NOSSE-NEXT: movl %esi, %ebx
325 ; X86-NOSSE-NEXT: shrl %ebx
326 ; X86-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555
327 ; X86-NOSSE-NEXT: subl %ebx, %esi
328 ; X86-NOSSE-NEXT: movl %esi, %ebx
329 ; X86-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333
330 ; X86-NOSSE-NEXT: shrl $2, %esi
331 ; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333
332 ; X86-NOSSE-NEXT: addl %ebx, %esi
333 ; X86-NOSSE-NEXT: movl %esi, %ebx
334 ; X86-NOSSE-NEXT: shrl $4, %ebx
335 ; X86-NOSSE-NEXT: addl %esi, %ebx
336 ; X86-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F
337 ; X86-NOSSE-NEXT: imull $16843009, %ebx, %esi # imm = 0x1010101
338 ; X86-NOSSE-NEXT: shrl $24, %esi
339 ; X86-NOSSE-NEXT: addl %edi, %esi
340 ; X86-NOSSE-NEXT: movl %edx, %edi
341 ; X86-NOSSE-NEXT: shrl %edi
342 ; X86-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555
343 ; X86-NOSSE-NEXT: subl %edi, %edx
344 ; X86-NOSSE-NEXT: movl %edx, %edi
345 ; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333
346 ; X86-NOSSE-NEXT: shrl $2, %edx
347 ; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333
348 ; X86-NOSSE-NEXT: addl %edi, %edx
349 ; X86-NOSSE-NEXT: movl %edx, %edi
350 ; X86-NOSSE-NEXT: shrl $4, %edi
351 ; X86-NOSSE-NEXT: addl %edx, %edi
352 ; X86-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
353 ; X86-NOSSE-NEXT: imull $16843009, %edi, %edx # imm = 0x1010101
354 ; X86-NOSSE-NEXT: shrl $24, %edx
355 ; X86-NOSSE-NEXT: movl %ecx, %edi
356 ; X86-NOSSE-NEXT: shrl %edi
357 ; X86-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555
358 ; X86-NOSSE-NEXT: subl %edi, %ecx
359 ; X86-NOSSE-NEXT: movl %ecx, %edi
360 ; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333
361 ; X86-NOSSE-NEXT: shrl $2, %ecx
362 ; X86-NOSSE-NEXT: andl $858993459, %ecx # imm = 0x33333333
363 ; X86-NOSSE-NEXT: addl %edi, %ecx
364 ; X86-NOSSE-NEXT: movl %ecx, %edi
365 ; X86-NOSSE-NEXT: shrl $4, %edi
366 ; X86-NOSSE-NEXT: addl %ecx, %edi
367 ; X86-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
368 ; X86-NOSSE-NEXT: imull $16843009, %edi, %ecx # imm = 0x1010101
369 ; X86-NOSSE-NEXT: shrl $24, %ecx
370 ; X86-NOSSE-NEXT: addl %edx, %ecx
371 ; X86-NOSSE-NEXT: addl %esi, %ecx
372 ; X86-NOSSE-NEXT: movl %ecx, (%eax)
373 ; X86-NOSSE-NEXT: movl $0, 12(%eax)
374 ; X86-NOSSE-NEXT: movl $0, 8(%eax)
375 ; X86-NOSSE-NEXT: movl $0, 4(%eax)
376 ; X86-NOSSE-NEXT: popl %esi
377 ; X86-NOSSE-NEXT: popl %edi
378 ; X86-NOSSE-NEXT: popl %ebx
379 ; X86-NOSSE-NEXT: retl $4
383 ; X64-NEXT: movq %rsi, %rax
384 ; X64-NEXT: shrq %rax
385 ; X64-NEXT: movabsq $6148914691236517205, %r8 # imm = 0x5555555555555555
386 ; X64-NEXT: andq %r8, %rax
387 ; X64-NEXT: subq %rax, %rsi
388 ; X64-NEXT: movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
389 ; X64-NEXT: movq %rsi, %rax
390 ; X64-NEXT: andq %rcx, %rax
391 ; X64-NEXT: shrq $2, %rsi
392 ; X64-NEXT: andq %rcx, %rsi
393 ; X64-NEXT: addq %rsi, %rax
394 ; X64-NEXT: movq %rax, %rdx
395 ; X64-NEXT: shrq $4, %rdx
396 ; X64-NEXT: addq %rax, %rdx
397 ; X64-NEXT: movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F
398 ; X64-NEXT: andq %rsi, %rdx
399 ; X64-NEXT: movabsq $72340172838076673, %r9 # imm = 0x101010101010101
400 ; X64-NEXT: imulq %r9, %rdx
401 ; X64-NEXT: shrq $56, %rdx
402 ; X64-NEXT: movq %rdi, %rax
403 ; X64-NEXT: shrq %rax
404 ; X64-NEXT: andq %r8, %rax
405 ; X64-NEXT: subq %rax, %rdi
406 ; X64-NEXT: movq %rdi, %rax
407 ; X64-NEXT: andq %rcx, %rax
408 ; X64-NEXT: shrq $2, %rdi
409 ; X64-NEXT: andq %rdi, %rcx
410 ; X64-NEXT: addq %rax, %rcx
411 ; X64-NEXT: movq %rcx, %rax
412 ; X64-NEXT: shrq $4, %rax
413 ; X64-NEXT: addq %rcx, %rax
414 ; X64-NEXT: andq %rsi, %rax
415 ; X64-NEXT: imulq %r9, %rax
416 ; X64-NEXT: shrq $56, %rax
417 ; X64-NEXT: addq %rdx, %rax
418 ; X64-NEXT: xorl %edx, %edx
421 ; X86-POPCNT-LABEL: cnt128:
422 ; X86-POPCNT: # %bb.0:
423 ; X86-POPCNT-NEXT: pushl %esi
424 ; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
425 ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
426 ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx
427 ; X86-POPCNT-NEXT: addl %ecx, %edx
428 ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
429 ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi
430 ; X86-POPCNT-NEXT: addl %ecx, %esi
431 ; X86-POPCNT-NEXT: addl %edx, %esi
432 ; X86-POPCNT-NEXT: movl %esi, (%eax)
433 ; X86-POPCNT-NEXT: movl $0, 12(%eax)
434 ; X86-POPCNT-NEXT: movl $0, 8(%eax)
435 ; X86-POPCNT-NEXT: movl $0, 4(%eax)
436 ; X86-POPCNT-NEXT: popl %esi
437 ; X86-POPCNT-NEXT: retl $4
439 ; X64-POPCNT-LABEL: cnt128:
440 ; X64-POPCNT: # %bb.0:
441 ; X64-POPCNT-NEXT: popcntq %rsi, %rcx
442 ; X64-POPCNT-NEXT: popcntq %rdi, %rax
443 ; X64-POPCNT-NEXT: addq %rcx, %rax
444 ; X64-POPCNT-NEXT: xorl %edx, %edx
445 ; X64-POPCNT-NEXT: retq
447 ; X86-SSE2-LABEL: cnt128:
449 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
450 ; X86-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
451 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm0
452 ; X86-SSE2-NEXT: psrlw $1, %xmm0
453 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
454 ; X86-SSE2-NEXT: pand %xmm1, %xmm0
455 ; X86-SSE2-NEXT: psubb %xmm0, %xmm2
456 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
457 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm3
458 ; X86-SSE2-NEXT: pand %xmm0, %xmm3
459 ; X86-SSE2-NEXT: psrlw $2, %xmm2
460 ; X86-SSE2-NEXT: pand %xmm0, %xmm2
461 ; X86-SSE2-NEXT: paddb %xmm3, %xmm2
462 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm4
463 ; X86-SSE2-NEXT: psrlw $4, %xmm4
464 ; X86-SSE2-NEXT: paddb %xmm2, %xmm4
465 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
466 ; X86-SSE2-NEXT: pand %xmm2, %xmm4
467 ; X86-SSE2-NEXT: pxor %xmm3, %xmm3
468 ; X86-SSE2-NEXT: psadbw %xmm3, %xmm4
469 ; X86-SSE2-NEXT: movd %xmm4, %ecx
470 ; X86-SSE2-NEXT: movq {{.*#+}} xmm4 = mem[0],zero
471 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm5
472 ; X86-SSE2-NEXT: psrlw $1, %xmm5
473 ; X86-SSE2-NEXT: pand %xmm1, %xmm5
474 ; X86-SSE2-NEXT: psubb %xmm5, %xmm4
475 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm1
476 ; X86-SSE2-NEXT: pand %xmm0, %xmm1
477 ; X86-SSE2-NEXT: psrlw $2, %xmm4
478 ; X86-SSE2-NEXT: pand %xmm0, %xmm4
479 ; X86-SSE2-NEXT: paddb %xmm1, %xmm4
480 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm0
481 ; X86-SSE2-NEXT: psrlw $4, %xmm0
482 ; X86-SSE2-NEXT: paddb %xmm4, %xmm0
483 ; X86-SSE2-NEXT: pand %xmm2, %xmm0
484 ; X86-SSE2-NEXT: psadbw %xmm3, %xmm0
485 ; X86-SSE2-NEXT: movd %xmm0, %edx
486 ; X86-SSE2-NEXT: addl %ecx, %edx
487 ; X86-SSE2-NEXT: movl %edx, (%eax)
488 ; X86-SSE2-NEXT: movl $0, 12(%eax)
489 ; X86-SSE2-NEXT: movl $0, 8(%eax)
490 ; X86-SSE2-NEXT: movl $0, 4(%eax)
491 ; X86-SSE2-NEXT: retl $4
493 ; X86-SSSE3-LABEL: cnt128:
494 ; X86-SSSE3: # %bb.0:
495 ; X86-SSSE3-NEXT: movl {{[0-9]+}}(%esp), %eax
496 ; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
497 ; X86-SSSE3-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
498 ; X86-SSSE3-NEXT: movdqa %xmm2, %xmm3
499 ; X86-SSSE3-NEXT: pand %xmm1, %xmm3
500 ; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
501 ; X86-SSSE3-NEXT: movdqa %xmm0, %xmm4
502 ; X86-SSSE3-NEXT: pshufb %xmm3, %xmm4
503 ; X86-SSSE3-NEXT: psrlw $4, %xmm2
504 ; X86-SSSE3-NEXT: pand %xmm1, %xmm2
505 ; X86-SSSE3-NEXT: movdqa %xmm0, %xmm3
506 ; X86-SSSE3-NEXT: pshufb %xmm2, %xmm3
507 ; X86-SSSE3-NEXT: paddb %xmm4, %xmm3
508 ; X86-SSSE3-NEXT: pxor %xmm2, %xmm2
509 ; X86-SSSE3-NEXT: psadbw %xmm2, %xmm3
510 ; X86-SSSE3-NEXT: movd %xmm3, %ecx
511 ; X86-SSSE3-NEXT: movq {{.*#+}} xmm3 = mem[0],zero
512 ; X86-SSSE3-NEXT: movdqa %xmm3, %xmm4
513 ; X86-SSSE3-NEXT: pand %xmm1, %xmm4
514 ; X86-SSSE3-NEXT: movdqa %xmm0, %xmm5
515 ; X86-SSSE3-NEXT: pshufb %xmm4, %xmm5
516 ; X86-SSSE3-NEXT: psrlw $4, %xmm3
517 ; X86-SSSE3-NEXT: pand %xmm1, %xmm3
518 ; X86-SSSE3-NEXT: pshufb %xmm3, %xmm0
519 ; X86-SSSE3-NEXT: paddb %xmm5, %xmm0
520 ; X86-SSSE3-NEXT: psadbw %xmm2, %xmm0
521 ; X86-SSSE3-NEXT: movd %xmm0, %edx
522 ; X86-SSSE3-NEXT: addl %ecx, %edx
523 ; X86-SSSE3-NEXT: movl %edx, (%eax)
524 ; X86-SSSE3-NEXT: movl $0, 12(%eax)
525 ; X86-SSSE3-NEXT: movl $0, 8(%eax)
526 ; X86-SSSE3-NEXT: movl $0, 4(%eax)
527 ; X86-SSSE3-NEXT: retl $4
528 %cnt = tail call i128 @llvm.ctpop.i128(i128 %x)
532 define i64 @cnt64_noimplicitfloat(i64 %x) nounwind readnone noimplicitfloat {
533 ; X86-LABEL: cnt64_noimplicitfloat:
535 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
536 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
537 ; X86-NEXT: movl %ecx, %edx
538 ; X86-NEXT: shrl %edx
539 ; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555
540 ; X86-NEXT: subl %edx, %ecx
541 ; X86-NEXT: movl %ecx, %edx
542 ; X86-NEXT: andl $858993459, %edx # imm = 0x33333333
543 ; X86-NEXT: shrl $2, %ecx
544 ; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
545 ; X86-NEXT: addl %edx, %ecx
546 ; X86-NEXT: movl %ecx, %edx
547 ; X86-NEXT: shrl $4, %edx
548 ; X86-NEXT: addl %ecx, %edx
549 ; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
550 ; X86-NEXT: imull $16843009, %edx, %ecx # imm = 0x1010101
551 ; X86-NEXT: shrl $24, %ecx
552 ; X86-NEXT: movl %eax, %edx
553 ; X86-NEXT: shrl %edx
554 ; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555
555 ; X86-NEXT: subl %edx, %eax
556 ; X86-NEXT: movl %eax, %edx
557 ; X86-NEXT: andl $858993459, %edx # imm = 0x33333333
558 ; X86-NEXT: shrl $2, %eax
559 ; X86-NEXT: andl $858993459, %eax # imm = 0x33333333
560 ; X86-NEXT: addl %edx, %eax
561 ; X86-NEXT: movl %eax, %edx
562 ; X86-NEXT: shrl $4, %edx
563 ; X86-NEXT: addl %eax, %edx
564 ; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
565 ; X86-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101
566 ; X86-NEXT: shrl $24, %eax
567 ; X86-NEXT: addl %ecx, %eax
568 ; X86-NEXT: xorl %edx, %edx
571 ; X64-LABEL: cnt64_noimplicitfloat:
573 ; X64-NEXT: movq %rdi, %rax
574 ; X64-NEXT: shrq %rax
575 ; X64-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
576 ; X64-NEXT: andq %rax, %rcx
577 ; X64-NEXT: subq %rcx, %rdi
578 ; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
579 ; X64-NEXT: movq %rdi, %rcx
580 ; X64-NEXT: andq %rax, %rcx
581 ; X64-NEXT: shrq $2, %rdi
582 ; X64-NEXT: andq %rdi, %rax
583 ; X64-NEXT: addq %rcx, %rax
584 ; X64-NEXT: movq %rax, %rcx
585 ; X64-NEXT: shrq $4, %rcx
586 ; X64-NEXT: addq %rax, %rcx
587 ; X64-NEXT: movabsq $1085102592571150095, %rdx # imm = 0xF0F0F0F0F0F0F0F
588 ; X64-NEXT: andq %rcx, %rdx
589 ; X64-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101
590 ; X64-NEXT: imulq %rdx, %rax
591 ; X64-NEXT: shrq $56, %rax
594 ; X86-POPCNT-LABEL: cnt64_noimplicitfloat:
595 ; X86-POPCNT: # %bb.0:
596 ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
597 ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
598 ; X86-POPCNT-NEXT: addl %ecx, %eax
599 ; X86-POPCNT-NEXT: xorl %edx, %edx
600 ; X86-POPCNT-NEXT: retl
602 ; X64-POPCNT-LABEL: cnt64_noimplicitfloat:
603 ; X64-POPCNT: # %bb.0:
604 ; X64-POPCNT-NEXT: popcntq %rdi, %rax
605 ; X64-POPCNT-NEXT: retq
606 %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
610 define i32 @cnt32_optsize(i32 %x) nounwind readnone optsize {
611 ; X86-LABEL: cnt32_optsize:
613 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
614 ; X86-NEXT: movl %eax, %ecx
615 ; X86-NEXT: shrl %ecx
616 ; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
617 ; X86-NEXT: subl %ecx, %eax
618 ; X86-NEXT: movl $858993459, %ecx # imm = 0x33333333
619 ; X86-NEXT: movl %eax, %edx
620 ; X86-NEXT: andl %ecx, %edx
621 ; X86-NEXT: shrl $2, %eax
622 ; X86-NEXT: andl %ecx, %eax
623 ; X86-NEXT: addl %edx, %eax
624 ; X86-NEXT: movl %eax, %ecx
625 ; X86-NEXT: shrl $4, %ecx
626 ; X86-NEXT: addl %eax, %ecx
627 ; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
628 ; X86-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
629 ; X86-NEXT: shrl $24, %eax
632 ; X64-LABEL: cnt32_optsize:
634 ; X64-NEXT: movl %edi, %eax
635 ; X64-NEXT: shrl %eax
636 ; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555
637 ; X64-NEXT: subl %eax, %edi
638 ; X64-NEXT: movl $858993459, %eax # imm = 0x33333333
639 ; X64-NEXT: movl %edi, %ecx
640 ; X64-NEXT: andl %eax, %ecx
641 ; X64-NEXT: shrl $2, %edi
642 ; X64-NEXT: andl %eax, %edi
643 ; X64-NEXT: addl %ecx, %edi
644 ; X64-NEXT: movl %edi, %eax
645 ; X64-NEXT: shrl $4, %eax
646 ; X64-NEXT: addl %edi, %eax
647 ; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
648 ; X64-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
649 ; X64-NEXT: shrl $24, %eax
652 ; X86-POPCNT-LABEL: cnt32_optsize:
653 ; X86-POPCNT: # %bb.0:
654 ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
655 ; X86-POPCNT-NEXT: retl
657 ; X64-POPCNT-LABEL: cnt32_optsize:
658 ; X64-POPCNT: # %bb.0:
659 ; X64-POPCNT-NEXT: popcntl %edi, %eax
660 ; X64-POPCNT-NEXT: retq
661 %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
665 define i64 @cnt64_optsize(i64 %x) nounwind readnone optsize {
666 ; X86-NOSSE-LABEL: cnt64_optsize:
667 ; X86-NOSSE: # %bb.0:
668 ; X86-NOSSE-NEXT: pushl %ebx
669 ; X86-NOSSE-NEXT: pushl %edi
670 ; X86-NOSSE-NEXT: pushl %esi
671 ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
672 ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
673 ; X86-NOSSE-NEXT: movl %esi, %ecx
674 ; X86-NOSSE-NEXT: shrl %ecx
675 ; X86-NOSSE-NEXT: movl $1431655765, %edx # imm = 0x55555555
676 ; X86-NOSSE-NEXT: andl %edx, %ecx
677 ; X86-NOSSE-NEXT: subl %ecx, %esi
678 ; X86-NOSSE-NEXT: movl $858993459, %ecx # imm = 0x33333333
679 ; X86-NOSSE-NEXT: movl %esi, %edi
680 ; X86-NOSSE-NEXT: andl %ecx, %edi
681 ; X86-NOSSE-NEXT: shrl $2, %esi
682 ; X86-NOSSE-NEXT: andl %ecx, %esi
683 ; X86-NOSSE-NEXT: addl %edi, %esi
684 ; X86-NOSSE-NEXT: movl %esi, %ebx
685 ; X86-NOSSE-NEXT: shrl $4, %ebx
686 ; X86-NOSSE-NEXT: addl %esi, %ebx
687 ; X86-NOSSE-NEXT: movl $252645135, %edi # imm = 0xF0F0F0F
688 ; X86-NOSSE-NEXT: andl %edi, %ebx
689 ; X86-NOSSE-NEXT: imull $16843009, %ebx, %esi # imm = 0x1010101
690 ; X86-NOSSE-NEXT: shrl $24, %esi
691 ; X86-NOSSE-NEXT: movl %eax, %ebx
692 ; X86-NOSSE-NEXT: shrl %ebx
693 ; X86-NOSSE-NEXT: andl %edx, %ebx
694 ; X86-NOSSE-NEXT: subl %ebx, %eax
695 ; X86-NOSSE-NEXT: movl %eax, %edx
696 ; X86-NOSSE-NEXT: andl %ecx, %edx
697 ; X86-NOSSE-NEXT: shrl $2, %eax
698 ; X86-NOSSE-NEXT: andl %ecx, %eax
699 ; X86-NOSSE-NEXT: addl %edx, %eax
700 ; X86-NOSSE-NEXT: movl %eax, %ecx
701 ; X86-NOSSE-NEXT: shrl $4, %ecx
702 ; X86-NOSSE-NEXT: addl %eax, %ecx
703 ; X86-NOSSE-NEXT: andl %edi, %ecx
704 ; X86-NOSSE-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
705 ; X86-NOSSE-NEXT: shrl $24, %eax
706 ; X86-NOSSE-NEXT: addl %esi, %eax
707 ; X86-NOSSE-NEXT: xorl %edx, %edx
708 ; X86-NOSSE-NEXT: popl %esi
709 ; X86-NOSSE-NEXT: popl %edi
710 ; X86-NOSSE-NEXT: popl %ebx
711 ; X86-NOSSE-NEXT: retl
713 ; X64-LABEL: cnt64_optsize:
715 ; X64-NEXT: movq %rdi, %rax
716 ; X64-NEXT: shrq %rax
717 ; X64-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
718 ; X64-NEXT: andq %rax, %rcx
719 ; X64-NEXT: subq %rcx, %rdi
720 ; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
721 ; X64-NEXT: movq %rdi, %rcx
722 ; X64-NEXT: andq %rax, %rcx
723 ; X64-NEXT: shrq $2, %rdi
724 ; X64-NEXT: andq %rdi, %rax
725 ; X64-NEXT: addq %rcx, %rax
726 ; X64-NEXT: movq %rax, %rcx
727 ; X64-NEXT: shrq $4, %rcx
728 ; X64-NEXT: addq %rax, %rcx
729 ; X64-NEXT: movabsq $1085102592571150095, %rdx # imm = 0xF0F0F0F0F0F0F0F
730 ; X64-NEXT: andq %rcx, %rdx
731 ; X64-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101
732 ; X64-NEXT: imulq %rdx, %rax
733 ; X64-NEXT: shrq $56, %rax
736 ; X86-POPCNT-LABEL: cnt64_optsize:
737 ; X86-POPCNT: # %bb.0:
738 ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
739 ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
740 ; X86-POPCNT-NEXT: addl %ecx, %eax
741 ; X86-POPCNT-NEXT: xorl %edx, %edx
742 ; X86-POPCNT-NEXT: retl
744 ; X64-POPCNT-LABEL: cnt64_optsize:
745 ; X64-POPCNT: # %bb.0:
746 ; X64-POPCNT-NEXT: popcntq %rdi, %rax
747 ; X64-POPCNT-NEXT: retq
749 ; X86-SSE2-LABEL: cnt64_optsize:
751 ; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
752 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
753 ; X86-SSE2-NEXT: psrlw $1, %xmm1
754 ; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
755 ; X86-SSE2-NEXT: psubb %xmm1, %xmm0
756 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
757 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
758 ; X86-SSE2-NEXT: pand %xmm1, %xmm2
759 ; X86-SSE2-NEXT: psrlw $2, %xmm0
760 ; X86-SSE2-NEXT: pand %xmm1, %xmm0
761 ; X86-SSE2-NEXT: paddb %xmm2, %xmm0
762 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
763 ; X86-SSE2-NEXT: psrlw $4, %xmm1
764 ; X86-SSE2-NEXT: paddb %xmm0, %xmm1
765 ; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
766 ; X86-SSE2-NEXT: pxor %xmm0, %xmm0
767 ; X86-SSE2-NEXT: psadbw %xmm1, %xmm0
768 ; X86-SSE2-NEXT: movd %xmm0, %eax
769 ; X86-SSE2-NEXT: xorl %edx, %edx
770 ; X86-SSE2-NEXT: retl
772 ; X86-SSSE3-LABEL: cnt64_optsize:
773 ; X86-SSSE3: # %bb.0:
774 ; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
775 ; X86-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
776 ; X86-SSSE3-NEXT: movdqa %xmm1, %xmm2
777 ; X86-SSSE3-NEXT: pand %xmm0, %xmm2
778 ; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
779 ; X86-SSSE3-NEXT: movdqa %xmm3, %xmm4
780 ; X86-SSSE3-NEXT: pshufb %xmm2, %xmm4
781 ; X86-SSSE3-NEXT: psrlw $4, %xmm1
782 ; X86-SSSE3-NEXT: pand %xmm0, %xmm1
783 ; X86-SSSE3-NEXT: pshufb %xmm1, %xmm3
784 ; X86-SSSE3-NEXT: paddb %xmm4, %xmm3
785 ; X86-SSSE3-NEXT: pxor %xmm0, %xmm0
786 ; X86-SSSE3-NEXT: psadbw %xmm3, %xmm0
787 ; X86-SSSE3-NEXT: movd %xmm0, %eax
788 ; X86-SSSE3-NEXT: xorl %edx, %edx
789 ; X86-SSSE3-NEXT: retl
790 %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
794 define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize {
795 ; X86-NOSSE-LABEL: cnt128_optsize:
796 ; X86-NOSSE: # %bb.0:
797 ; X86-NOSSE-NEXT: pushl %ebp
798 ; X86-NOSSE-NEXT: pushl %ebx
799 ; X86-NOSSE-NEXT: pushl %edi
800 ; X86-NOSSE-NEXT: pushl %esi
801 ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
802 ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
803 ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
804 ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx
805 ; X86-NOSSE-NEXT: movl %ebx, %ecx
806 ; X86-NOSSE-NEXT: shrl %ecx
807 ; X86-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555
808 ; X86-NOSSE-NEXT: andl %edi, %ecx
809 ; X86-NOSSE-NEXT: subl %ecx, %ebx
810 ; X86-NOSSE-NEXT: movl $858993459, %ecx # imm = 0x33333333
811 ; X86-NOSSE-NEXT: movl %ebx, %ebp
812 ; X86-NOSSE-NEXT: andl %ecx, %ebp
813 ; X86-NOSSE-NEXT: shrl $2, %ebx
814 ; X86-NOSSE-NEXT: andl %ecx, %ebx
815 ; X86-NOSSE-NEXT: addl %ebp, %ebx
816 ; X86-NOSSE-NEXT: movl %ebx, %ebp
817 ; X86-NOSSE-NEXT: shrl $4, %ebp
818 ; X86-NOSSE-NEXT: addl %ebx, %ebp
819 ; X86-NOSSE-NEXT: movl %eax, %ebx
820 ; X86-NOSSE-NEXT: shrl %ebx
821 ; X86-NOSSE-NEXT: andl %edi, %ebx
822 ; X86-NOSSE-NEXT: subl %ebx, %eax
823 ; X86-NOSSE-NEXT: movl %eax, %ebx
824 ; X86-NOSSE-NEXT: andl %ecx, %ebx
825 ; X86-NOSSE-NEXT: shrl $2, %eax
826 ; X86-NOSSE-NEXT: andl %ecx, %eax
827 ; X86-NOSSE-NEXT: addl %ebx, %eax
828 ; X86-NOSSE-NEXT: movl %eax, %edi
829 ; X86-NOSSE-NEXT: shrl $4, %edi
830 ; X86-NOSSE-NEXT: addl %eax, %edi
831 ; X86-NOSSE-NEXT: movl $252645135, %ebx # imm = 0xF0F0F0F
832 ; X86-NOSSE-NEXT: andl %ebx, %ebp
833 ; X86-NOSSE-NEXT: imull $16843009, %ebp, %eax # imm = 0x1010101
834 ; X86-NOSSE-NEXT: shrl $24, %eax
835 ; X86-NOSSE-NEXT: andl %ebx, %edi
836 ; X86-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101
837 ; X86-NOSSE-NEXT: shrl $24, %edi
838 ; X86-NOSSE-NEXT: addl %eax, %edi
839 ; X86-NOSSE-NEXT: movl %esi, %eax
840 ; X86-NOSSE-NEXT: shrl %eax
841 ; X86-NOSSE-NEXT: movl $1431655765, %ebp # imm = 0x55555555
842 ; X86-NOSSE-NEXT: andl %ebp, %eax
843 ; X86-NOSSE-NEXT: subl %eax, %esi
844 ; X86-NOSSE-NEXT: movl %esi, %eax
845 ; X86-NOSSE-NEXT: andl %ecx, %eax
846 ; X86-NOSSE-NEXT: shrl $2, %esi
847 ; X86-NOSSE-NEXT: andl %ecx, %esi
848 ; X86-NOSSE-NEXT: addl %eax, %esi
849 ; X86-NOSSE-NEXT: movl %esi, %ebp
850 ; X86-NOSSE-NEXT: shrl $4, %ebp
851 ; X86-NOSSE-NEXT: addl %esi, %ebp
852 ; X86-NOSSE-NEXT: movl %edx, %eax
853 ; X86-NOSSE-NEXT: shrl %eax
854 ; X86-NOSSE-NEXT: movl $1431655765, %esi # imm = 0x55555555
855 ; X86-NOSSE-NEXT: andl %esi, %eax
856 ; X86-NOSSE-NEXT: subl %eax, %edx
857 ; X86-NOSSE-NEXT: movl %edx, %eax
858 ; X86-NOSSE-NEXT: andl %ecx, %eax
859 ; X86-NOSSE-NEXT: shrl $2, %edx
860 ; X86-NOSSE-NEXT: andl %ecx, %edx
861 ; X86-NOSSE-NEXT: addl %eax, %edx
862 ; X86-NOSSE-NEXT: movl %edx, %eax
863 ; X86-NOSSE-NEXT: shrl $4, %eax
864 ; X86-NOSSE-NEXT: addl %edx, %eax
865 ; X86-NOSSE-NEXT: andl %ebx, %ebp
866 ; X86-NOSSE-NEXT: andl %ebx, %eax
867 ; X86-NOSSE-NEXT: imull $16843009, %ebp, %ecx # imm = 0x1010101
868 ; X86-NOSSE-NEXT: shrl $24, %ecx
869 ; X86-NOSSE-NEXT: imull $16843009, %eax, %edx # imm = 0x1010101
870 ; X86-NOSSE-NEXT: shrl $24, %edx
871 ; X86-NOSSE-NEXT: addl %ecx, %edx
872 ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
873 ; X86-NOSSE-NEXT: addl %edi, %edx
874 ; X86-NOSSE-NEXT: xorl %ecx, %ecx
875 ; X86-NOSSE-NEXT: movl %ecx, 12(%eax)
876 ; X86-NOSSE-NEXT: movl %ecx, 8(%eax)
877 ; X86-NOSSE-NEXT: movl %ecx, 4(%eax)
878 ; X86-NOSSE-NEXT: movl %edx, (%eax)
879 ; X86-NOSSE-NEXT: popl %esi
880 ; X86-NOSSE-NEXT: popl %edi
881 ; X86-NOSSE-NEXT: popl %ebx
882 ; X86-NOSSE-NEXT: popl %ebp
883 ; X86-NOSSE-NEXT: retl $4
885 ; X64-LABEL: cnt128_optsize:
887 ; X64-NEXT: movq %rsi, %rax
888 ; X64-NEXT: shrq %rax
889 ; X64-NEXT: movabsq $6148914691236517205, %r8 # imm = 0x5555555555555555
890 ; X64-NEXT: andq %r8, %rax
891 ; X64-NEXT: subq %rax, %rsi
892 ; X64-NEXT: movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
893 ; X64-NEXT: movq %rsi, %rax
894 ; X64-NEXT: andq %rcx, %rax
895 ; X64-NEXT: shrq $2, %rsi
896 ; X64-NEXT: andq %rcx, %rsi
897 ; X64-NEXT: addq %rsi, %rax
898 ; X64-NEXT: movq %rax, %rdx
899 ; X64-NEXT: shrq $4, %rdx
900 ; X64-NEXT: addq %rax, %rdx
901 ; X64-NEXT: movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F
902 ; X64-NEXT: andq %rsi, %rdx
903 ; X64-NEXT: movabsq $72340172838076673, %r9 # imm = 0x101010101010101
904 ; X64-NEXT: imulq %r9, %rdx
905 ; X64-NEXT: shrq $56, %rdx
906 ; X64-NEXT: movq %rdi, %rax
907 ; X64-NEXT: shrq %rax
908 ; X64-NEXT: andq %r8, %rax
909 ; X64-NEXT: subq %rax, %rdi
910 ; X64-NEXT: movq %rdi, %rax
911 ; X64-NEXT: andq %rcx, %rax
912 ; X64-NEXT: shrq $2, %rdi
913 ; X64-NEXT: andq %rdi, %rcx
914 ; X64-NEXT: addq %rax, %rcx
915 ; X64-NEXT: movq %rcx, %rax
916 ; X64-NEXT: shrq $4, %rax
917 ; X64-NEXT: addq %rcx, %rax
918 ; X64-NEXT: andq %rsi, %rax
919 ; X64-NEXT: imulq %r9, %rax
920 ; X64-NEXT: shrq $56, %rax
921 ; X64-NEXT: addq %rdx, %rax
922 ; X64-NEXT: xorl %edx, %edx
925 ; X86-POPCNT-LABEL: cnt128_optsize:
926 ; X86-POPCNT: # %bb.0:
927 ; X86-POPCNT-NEXT: pushl %esi
928 ; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
929 ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
930 ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx
931 ; X86-POPCNT-NEXT: addl %ecx, %edx
932 ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
933 ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi
934 ; X86-POPCNT-NEXT: addl %ecx, %esi
935 ; X86-POPCNT-NEXT: addl %edx, %esi
936 ; X86-POPCNT-NEXT: xorl %ecx, %ecx
937 ; X86-POPCNT-NEXT: movl %ecx, 12(%eax)
938 ; X86-POPCNT-NEXT: movl %ecx, 8(%eax)
939 ; X86-POPCNT-NEXT: movl %ecx, 4(%eax)
940 ; X86-POPCNT-NEXT: movl %esi, (%eax)
941 ; X86-POPCNT-NEXT: popl %esi
942 ; X86-POPCNT-NEXT: retl $4
944 ; X64-POPCNT-LABEL: cnt128_optsize:
945 ; X64-POPCNT: # %bb.0:
946 ; X64-POPCNT-NEXT: popcntq %rsi, %rcx
947 ; X64-POPCNT-NEXT: popcntq %rdi, %rax
948 ; X64-POPCNT-NEXT: addq %rcx, %rax
949 ; X64-POPCNT-NEXT: xorl %edx, %edx
950 ; X64-POPCNT-NEXT: retq
952 ; X86-SSE2-LABEL: cnt128_optsize:
954 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
955 ; X86-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
956 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm0
957 ; X86-SSE2-NEXT: psrlw $1, %xmm0
958 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
959 ; X86-SSE2-NEXT: pand %xmm1, %xmm0
960 ; X86-SSE2-NEXT: psubb %xmm0, %xmm2
961 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
962 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm3
963 ; X86-SSE2-NEXT: pand %xmm0, %xmm3
964 ; X86-SSE2-NEXT: psrlw $2, %xmm2
965 ; X86-SSE2-NEXT: pand %xmm0, %xmm2
966 ; X86-SSE2-NEXT: paddb %xmm3, %xmm2
967 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm4
968 ; X86-SSE2-NEXT: psrlw $4, %xmm4
969 ; X86-SSE2-NEXT: paddb %xmm2, %xmm4
970 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
971 ; X86-SSE2-NEXT: pand %xmm2, %xmm4
972 ; X86-SSE2-NEXT: pxor %xmm3, %xmm3
973 ; X86-SSE2-NEXT: psadbw %xmm3, %xmm4
974 ; X86-SSE2-NEXT: movd %xmm4, %ecx
975 ; X86-SSE2-NEXT: movq {{.*#+}} xmm4 = mem[0],zero
976 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm5
977 ; X86-SSE2-NEXT: psrlw $1, %xmm5
978 ; X86-SSE2-NEXT: pand %xmm1, %xmm5
979 ; X86-SSE2-NEXT: psubb %xmm5, %xmm4
980 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm1
981 ; X86-SSE2-NEXT: pand %xmm0, %xmm1
982 ; X86-SSE2-NEXT: psrlw $2, %xmm4
983 ; X86-SSE2-NEXT: pand %xmm0, %xmm4
984 ; X86-SSE2-NEXT: paddb %xmm1, %xmm4
985 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm0
986 ; X86-SSE2-NEXT: psrlw $4, %xmm0
987 ; X86-SSE2-NEXT: paddb %xmm4, %xmm0
988 ; X86-SSE2-NEXT: pand %xmm2, %xmm0
989 ; X86-SSE2-NEXT: psadbw %xmm3, %xmm0
990 ; X86-SSE2-NEXT: movd %xmm0, %edx
991 ; X86-SSE2-NEXT: addl %ecx, %edx
992 ; X86-SSE2-NEXT: xorl %ecx, %ecx
993 ; X86-SSE2-NEXT: movl %ecx, 12(%eax)
994 ; X86-SSE2-NEXT: movl %ecx, 8(%eax)
995 ; X86-SSE2-NEXT: movl %ecx, 4(%eax)
996 ; X86-SSE2-NEXT: movl %edx, (%eax)
997 ; X86-SSE2-NEXT: retl $4
999 ; X86-SSSE3-LABEL: cnt128_optsize:
1000 ; X86-SSSE3: # %bb.0:
1001 ; X86-SSSE3-NEXT: movl {{[0-9]+}}(%esp), %eax
1002 ; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1003 ; X86-SSSE3-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
1004 ; X86-SSSE3-NEXT: movdqa %xmm2, %xmm3
1005 ; X86-SSSE3-NEXT: pand %xmm1, %xmm3
1006 ; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1007 ; X86-SSSE3-NEXT: movdqa %xmm0, %xmm4
1008 ; X86-SSSE3-NEXT: pshufb %xmm3, %xmm4
1009 ; X86-SSSE3-NEXT: psrlw $4, %xmm2
1010 ; X86-SSSE3-NEXT: pand %xmm1, %xmm2
1011 ; X86-SSSE3-NEXT: movdqa %xmm0, %xmm3
1012 ; X86-SSSE3-NEXT: pshufb %xmm2, %xmm3
1013 ; X86-SSSE3-NEXT: paddb %xmm4, %xmm3
1014 ; X86-SSSE3-NEXT: pxor %xmm2, %xmm2
1015 ; X86-SSSE3-NEXT: psadbw %xmm2, %xmm3
1016 ; X86-SSSE3-NEXT: movd %xmm3, %ecx
1017 ; X86-SSSE3-NEXT: movq {{.*#+}} xmm3 = mem[0],zero
1018 ; X86-SSSE3-NEXT: movdqa %xmm3, %xmm4
1019 ; X86-SSSE3-NEXT: pand %xmm1, %xmm4
1020 ; X86-SSSE3-NEXT: movdqa %xmm0, %xmm5
1021 ; X86-SSSE3-NEXT: pshufb %xmm4, %xmm5
1022 ; X86-SSSE3-NEXT: psrlw $4, %xmm3
1023 ; X86-SSSE3-NEXT: pand %xmm1, %xmm3
1024 ; X86-SSSE3-NEXT: pshufb %xmm3, %xmm0
1025 ; X86-SSSE3-NEXT: paddb %xmm5, %xmm0
1026 ; X86-SSSE3-NEXT: psadbw %xmm2, %xmm0
1027 ; X86-SSSE3-NEXT: movd %xmm0, %edx
1028 ; X86-SSSE3-NEXT: addl %ecx, %edx
1029 ; X86-SSSE3-NEXT: xorl %ecx, %ecx
1030 ; X86-SSSE3-NEXT: movl %ecx, 12(%eax)
1031 ; X86-SSSE3-NEXT: movl %ecx, 8(%eax)
1032 ; X86-SSSE3-NEXT: movl %ecx, 4(%eax)
1033 ; X86-SSSE3-NEXT: movl %edx, (%eax)
1034 ; X86-SSSE3-NEXT: retl $4
1035 %cnt = tail call i128 @llvm.ctpop.i128(i128 %x)
1039 define i32 @cnt32_pgso(i32 %x) nounwind readnone !prof !14 {
1040 ; X86-LABEL: cnt32_pgso:
1042 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1043 ; X86-NEXT: movl %eax, %ecx
1044 ; X86-NEXT: shrl %ecx
1045 ; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
1046 ; X86-NEXT: subl %ecx, %eax
1047 ; X86-NEXT: movl %eax, %ecx
1048 ; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
1049 ; X86-NEXT: shrl $2, %eax
1050 ; X86-NEXT: andl $858993459, %eax # imm = 0x33333333
1051 ; X86-NEXT: addl %ecx, %eax
1052 ; X86-NEXT: movl %eax, %ecx
1053 ; X86-NEXT: shrl $4, %ecx
1054 ; X86-NEXT: addl %eax, %ecx
1055 ; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
1056 ; X86-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
1057 ; X86-NEXT: shrl $24, %eax
1060 ; X64-LABEL: cnt32_pgso:
1062 ; X64-NEXT: movl %edi, %eax
1063 ; X64-NEXT: shrl %eax
1064 ; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555
1065 ; X64-NEXT: subl %eax, %edi
1066 ; X64-NEXT: movl %edi, %eax
1067 ; X64-NEXT: andl $858993459, %eax # imm = 0x33333333
1068 ; X64-NEXT: shrl $2, %edi
1069 ; X64-NEXT: andl $858993459, %edi # imm = 0x33333333
1070 ; X64-NEXT: addl %eax, %edi
1071 ; X64-NEXT: movl %edi, %eax
1072 ; X64-NEXT: shrl $4, %eax
1073 ; X64-NEXT: addl %edi, %eax
1074 ; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
1075 ; X64-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
1076 ; X64-NEXT: shrl $24, %eax
1079 ; X86-POPCNT-LABEL: cnt32_pgso:
1080 ; X86-POPCNT: # %bb.0:
1081 ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
1082 ; X86-POPCNT-NEXT: retl
1084 ; X64-POPCNT-LABEL: cnt32_pgso:
1085 ; X64-POPCNT: # %bb.0:
1086 ; X64-POPCNT-NEXT: popcntl %edi, %eax
1087 ; X64-POPCNT-NEXT: retq
1088 %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
1092 define i64 @cnt64_pgso(i64 %x) nounwind readnone !prof !14 {
1093 ; X86-NOSSE-LABEL: cnt64_pgso:
1094 ; X86-NOSSE: # %bb.0:
1095 ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
1096 ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
1097 ; X86-NOSSE-NEXT: movl %ecx, %edx
1098 ; X86-NOSSE-NEXT: shrl %edx
1099 ; X86-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555
1100 ; X86-NOSSE-NEXT: subl %edx, %ecx
1101 ; X86-NOSSE-NEXT: movl %ecx, %edx
1102 ; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333
1103 ; X86-NOSSE-NEXT: shrl $2, %ecx
1104 ; X86-NOSSE-NEXT: andl $858993459, %ecx # imm = 0x33333333
1105 ; X86-NOSSE-NEXT: addl %edx, %ecx
1106 ; X86-NOSSE-NEXT: movl %ecx, %edx
1107 ; X86-NOSSE-NEXT: shrl $4, %edx
1108 ; X86-NOSSE-NEXT: addl %ecx, %edx
1109 ; X86-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
1110 ; X86-NOSSE-NEXT: imull $16843009, %edx, %ecx # imm = 0x1010101
1111 ; X86-NOSSE-NEXT: shrl $24, %ecx
1112 ; X86-NOSSE-NEXT: movl %eax, %edx
1113 ; X86-NOSSE-NEXT: shrl %edx
1114 ; X86-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555
1115 ; X86-NOSSE-NEXT: subl %edx, %eax
1116 ; X86-NOSSE-NEXT: movl %eax, %edx
1117 ; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333
1118 ; X86-NOSSE-NEXT: shrl $2, %eax
1119 ; X86-NOSSE-NEXT: andl $858993459, %eax # imm = 0x33333333
1120 ; X86-NOSSE-NEXT: addl %edx, %eax
1121 ; X86-NOSSE-NEXT: movl %eax, %edx
1122 ; X86-NOSSE-NEXT: shrl $4, %edx
1123 ; X86-NOSSE-NEXT: addl %eax, %edx
1124 ; X86-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
1125 ; X86-NOSSE-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101
1126 ; X86-NOSSE-NEXT: shrl $24, %eax
1127 ; X86-NOSSE-NEXT: addl %ecx, %eax
1128 ; X86-NOSSE-NEXT: xorl %edx, %edx
1129 ; X86-NOSSE-NEXT: retl
1131 ; X64-LABEL: cnt64_pgso:
1133 ; X64-NEXT: movq %rdi, %rax
1134 ; X64-NEXT: shrq %rax
1135 ; X64-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
1136 ; X64-NEXT: andq %rax, %rcx
1137 ; X64-NEXT: subq %rcx, %rdi
1138 ; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
1139 ; X64-NEXT: movq %rdi, %rcx
1140 ; X64-NEXT: andq %rax, %rcx
1141 ; X64-NEXT: shrq $2, %rdi
1142 ; X64-NEXT: andq %rdi, %rax
1143 ; X64-NEXT: addq %rcx, %rax
1144 ; X64-NEXT: movq %rax, %rcx
1145 ; X64-NEXT: shrq $4, %rcx
1146 ; X64-NEXT: addq %rax, %rcx
1147 ; X64-NEXT: movabsq $1085102592571150095, %rdx # imm = 0xF0F0F0F0F0F0F0F
1148 ; X64-NEXT: andq %rcx, %rdx
1149 ; X64-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101
1150 ; X64-NEXT: imulq %rdx, %rax
1151 ; X64-NEXT: shrq $56, %rax
1154 ; X86-POPCNT-LABEL: cnt64_pgso:
1155 ; X86-POPCNT: # %bb.0:
1156 ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
1157 ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
1158 ; X86-POPCNT-NEXT: addl %ecx, %eax
1159 ; X86-POPCNT-NEXT: xorl %edx, %edx
1160 ; X86-POPCNT-NEXT: retl
1162 ; X64-POPCNT-LABEL: cnt64_pgso:
1163 ; X64-POPCNT: # %bb.0:
1164 ; X64-POPCNT-NEXT: popcntq %rdi, %rax
1165 ; X64-POPCNT-NEXT: retq
1167 ; X86-SSE2-LABEL: cnt64_pgso:
1168 ; X86-SSE2: # %bb.0:
1169 ; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1170 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
1171 ; X86-SSE2-NEXT: psrlw $1, %xmm1
1172 ; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
1173 ; X86-SSE2-NEXT: psubb %xmm1, %xmm0
1174 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1175 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
1176 ; X86-SSE2-NEXT: pand %xmm1, %xmm2
1177 ; X86-SSE2-NEXT: psrlw $2, %xmm0
1178 ; X86-SSE2-NEXT: pand %xmm1, %xmm0
1179 ; X86-SSE2-NEXT: paddb %xmm2, %xmm0
1180 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
1181 ; X86-SSE2-NEXT: psrlw $4, %xmm1
1182 ; X86-SSE2-NEXT: paddb %xmm0, %xmm1
1183 ; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
1184 ; X86-SSE2-NEXT: pxor %xmm0, %xmm0
1185 ; X86-SSE2-NEXT: psadbw %xmm1, %xmm0
1186 ; X86-SSE2-NEXT: movd %xmm0, %eax
1187 ; X86-SSE2-NEXT: xorl %edx, %edx
1188 ; X86-SSE2-NEXT: retl
1190 ; X86-SSSE3-LABEL: cnt64_pgso:
1191 ; X86-SSSE3: # %bb.0:
1192 ; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1193 ; X86-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1194 ; X86-SSSE3-NEXT: movdqa %xmm1, %xmm2
1195 ; X86-SSSE3-NEXT: pand %xmm0, %xmm2
1196 ; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1197 ; X86-SSSE3-NEXT: movdqa %xmm3, %xmm4
1198 ; X86-SSSE3-NEXT: pshufb %xmm2, %xmm4
1199 ; X86-SSSE3-NEXT: psrlw $4, %xmm1
1200 ; X86-SSSE3-NEXT: pand %xmm0, %xmm1
1201 ; X86-SSSE3-NEXT: pshufb %xmm1, %xmm3
1202 ; X86-SSSE3-NEXT: paddb %xmm4, %xmm3
1203 ; X86-SSSE3-NEXT: pxor %xmm0, %xmm0
1204 ; X86-SSSE3-NEXT: psadbw %xmm3, %xmm0
1205 ; X86-SSSE3-NEXT: movd %xmm0, %eax
1206 ; X86-SSSE3-NEXT: xorl %edx, %edx
1207 ; X86-SSSE3-NEXT: retl
1208 %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
1212 define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 {
1213 ; X86-NOSSE-LABEL: cnt128_pgso:
1214 ; X86-NOSSE: # %bb.0:
1215 ; X86-NOSSE-NEXT: pushl %ebx
1216 ; X86-NOSSE-NEXT: pushl %edi
1217 ; X86-NOSSE-NEXT: pushl %esi
1218 ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
1219 ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
1220 ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
1221 ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
1222 ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edi
1223 ; X86-NOSSE-NEXT: movl %edi, %ebx
1224 ; X86-NOSSE-NEXT: shrl %ebx
1225 ; X86-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555
1226 ; X86-NOSSE-NEXT: subl %ebx, %edi
1227 ; X86-NOSSE-NEXT: movl %edi, %ebx
1228 ; X86-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333
1229 ; X86-NOSSE-NEXT: shrl $2, %edi
1230 ; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333
1231 ; X86-NOSSE-NEXT: addl %ebx, %edi
1232 ; X86-NOSSE-NEXT: movl %edi, %ebx
1233 ; X86-NOSSE-NEXT: shrl $4, %ebx
1234 ; X86-NOSSE-NEXT: addl %edi, %ebx
1235 ; X86-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F
1236 ; X86-NOSSE-NEXT: imull $16843009, %ebx, %edi # imm = 0x1010101
1237 ; X86-NOSSE-NEXT: shrl $24, %edi
1238 ; X86-NOSSE-NEXT: movl %esi, %ebx
1239 ; X86-NOSSE-NEXT: shrl %ebx
1240 ; X86-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555
1241 ; X86-NOSSE-NEXT: subl %ebx, %esi
1242 ; X86-NOSSE-NEXT: movl %esi, %ebx
1243 ; X86-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333
1244 ; X86-NOSSE-NEXT: shrl $2, %esi
1245 ; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333
1246 ; X86-NOSSE-NEXT: addl %ebx, %esi
1247 ; X86-NOSSE-NEXT: movl %esi, %ebx
1248 ; X86-NOSSE-NEXT: shrl $4, %ebx
1249 ; X86-NOSSE-NEXT: addl %esi, %ebx
1250 ; X86-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F
1251 ; X86-NOSSE-NEXT: imull $16843009, %ebx, %esi # imm = 0x1010101
1252 ; X86-NOSSE-NEXT: shrl $24, %esi
1253 ; X86-NOSSE-NEXT: addl %edi, %esi
1254 ; X86-NOSSE-NEXT: movl %edx, %edi
1255 ; X86-NOSSE-NEXT: shrl %edi
1256 ; X86-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555
1257 ; X86-NOSSE-NEXT: subl %edi, %edx
1258 ; X86-NOSSE-NEXT: movl %edx, %edi
1259 ; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333
1260 ; X86-NOSSE-NEXT: shrl $2, %edx
1261 ; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333
1262 ; X86-NOSSE-NEXT: addl %edi, %edx
1263 ; X86-NOSSE-NEXT: movl %edx, %edi
1264 ; X86-NOSSE-NEXT: shrl $4, %edi
1265 ; X86-NOSSE-NEXT: addl %edx, %edi
1266 ; X86-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
1267 ; X86-NOSSE-NEXT: imull $16843009, %edi, %edx # imm = 0x1010101
1268 ; X86-NOSSE-NEXT: shrl $24, %edx
1269 ; X86-NOSSE-NEXT: movl %ecx, %edi
1270 ; X86-NOSSE-NEXT: shrl %edi
1271 ; X86-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555
1272 ; X86-NOSSE-NEXT: subl %edi, %ecx
1273 ; X86-NOSSE-NEXT: movl %ecx, %edi
1274 ; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333
1275 ; X86-NOSSE-NEXT: shrl $2, %ecx
1276 ; X86-NOSSE-NEXT: andl $858993459, %ecx # imm = 0x33333333
1277 ; X86-NOSSE-NEXT: addl %edi, %ecx
1278 ; X86-NOSSE-NEXT: movl %ecx, %edi
1279 ; X86-NOSSE-NEXT: shrl $4, %edi
1280 ; X86-NOSSE-NEXT: addl %ecx, %edi
1281 ; X86-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
1282 ; X86-NOSSE-NEXT: imull $16843009, %edi, %ecx # imm = 0x1010101
1283 ; X86-NOSSE-NEXT: shrl $24, %ecx
1284 ; X86-NOSSE-NEXT: addl %edx, %ecx
1285 ; X86-NOSSE-NEXT: addl %esi, %ecx
1286 ; X86-NOSSE-NEXT: xorl %edx, %edx
1287 ; X86-NOSSE-NEXT: movl %edx, 12(%eax)
1288 ; X86-NOSSE-NEXT: movl %edx, 8(%eax)
1289 ; X86-NOSSE-NEXT: movl %edx, 4(%eax)
1290 ; X86-NOSSE-NEXT: movl %ecx, (%eax)
1291 ; X86-NOSSE-NEXT: popl %esi
1292 ; X86-NOSSE-NEXT: popl %edi
1293 ; X86-NOSSE-NEXT: popl %ebx
1294 ; X86-NOSSE-NEXT: retl $4
1296 ; X64-LABEL: cnt128_pgso:
1298 ; X64-NEXT: movq %rsi, %rax
1299 ; X64-NEXT: shrq %rax
1300 ; X64-NEXT: movabsq $6148914691236517205, %r8 # imm = 0x5555555555555555
1301 ; X64-NEXT: andq %r8, %rax
1302 ; X64-NEXT: subq %rax, %rsi
1303 ; X64-NEXT: movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
1304 ; X64-NEXT: movq %rsi, %rax
1305 ; X64-NEXT: andq %rcx, %rax
1306 ; X64-NEXT: shrq $2, %rsi
1307 ; X64-NEXT: andq %rcx, %rsi
1308 ; X64-NEXT: addq %rsi, %rax
1309 ; X64-NEXT: movq %rax, %rdx
1310 ; X64-NEXT: shrq $4, %rdx
1311 ; X64-NEXT: addq %rax, %rdx
1312 ; X64-NEXT: movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F
1313 ; X64-NEXT: andq %rsi, %rdx
1314 ; X64-NEXT: movabsq $72340172838076673, %r9 # imm = 0x101010101010101
1315 ; X64-NEXT: imulq %r9, %rdx
1316 ; X64-NEXT: shrq $56, %rdx
1317 ; X64-NEXT: movq %rdi, %rax
1318 ; X64-NEXT: shrq %rax
1319 ; X64-NEXT: andq %r8, %rax
1320 ; X64-NEXT: subq %rax, %rdi
1321 ; X64-NEXT: movq %rdi, %rax
1322 ; X64-NEXT: andq %rcx, %rax
1323 ; X64-NEXT: shrq $2, %rdi
1324 ; X64-NEXT: andq %rdi, %rcx
1325 ; X64-NEXT: addq %rax, %rcx
1326 ; X64-NEXT: movq %rcx, %rax
1327 ; X64-NEXT: shrq $4, %rax
1328 ; X64-NEXT: addq %rcx, %rax
1329 ; X64-NEXT: andq %rsi, %rax
1330 ; X64-NEXT: imulq %r9, %rax
1331 ; X64-NEXT: shrq $56, %rax
1332 ; X64-NEXT: addq %rdx, %rax
1333 ; X64-NEXT: xorl %edx, %edx
1336 ; X86-POPCNT-LABEL: cnt128_pgso:
1337 ; X86-POPCNT: # %bb.0:
1338 ; X86-POPCNT-NEXT: pushl %esi
1339 ; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
1340 ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
1341 ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx
1342 ; X86-POPCNT-NEXT: addl %ecx, %edx
1343 ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
1344 ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi
1345 ; X86-POPCNT-NEXT: addl %ecx, %esi
1346 ; X86-POPCNT-NEXT: addl %edx, %esi
1347 ; X86-POPCNT-NEXT: xorl %ecx, %ecx
1348 ; X86-POPCNT-NEXT: movl %ecx, 12(%eax)
1349 ; X86-POPCNT-NEXT: movl %ecx, 8(%eax)
1350 ; X86-POPCNT-NEXT: movl %ecx, 4(%eax)
1351 ; X86-POPCNT-NEXT: movl %esi, (%eax)
1352 ; X86-POPCNT-NEXT: popl %esi
1353 ; X86-POPCNT-NEXT: retl $4
1355 ; X64-POPCNT-LABEL: cnt128_pgso:
1356 ; X64-POPCNT: # %bb.0:
1357 ; X64-POPCNT-NEXT: popcntq %rsi, %rcx
1358 ; X64-POPCNT-NEXT: popcntq %rdi, %rax
1359 ; X64-POPCNT-NEXT: addq %rcx, %rax
1360 ; X64-POPCNT-NEXT: xorl %edx, %edx
1361 ; X64-POPCNT-NEXT: retq
1363 ; X86-SSE2-LABEL: cnt128_pgso:
1364 ; X86-SSE2: # %bb.0:
1365 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
1366 ; X86-SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
1367 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm0
1368 ; X86-SSE2-NEXT: psrlw $1, %xmm0
1369 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
1370 ; X86-SSE2-NEXT: pand %xmm1, %xmm0
1371 ; X86-SSE2-NEXT: psubb %xmm0, %xmm2
1372 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
1373 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm3
1374 ; X86-SSE2-NEXT: pand %xmm0, %xmm3
1375 ; X86-SSE2-NEXT: psrlw $2, %xmm2
1376 ; X86-SSE2-NEXT: pand %xmm0, %xmm2
1377 ; X86-SSE2-NEXT: paddb %xmm3, %xmm2
1378 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm4
1379 ; X86-SSE2-NEXT: psrlw $4, %xmm4
1380 ; X86-SSE2-NEXT: paddb %xmm2, %xmm4
1381 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1382 ; X86-SSE2-NEXT: pand %xmm2, %xmm4
1383 ; X86-SSE2-NEXT: pxor %xmm3, %xmm3
1384 ; X86-SSE2-NEXT: psadbw %xmm3, %xmm4
1385 ; X86-SSE2-NEXT: movd %xmm4, %ecx
1386 ; X86-SSE2-NEXT: movq {{.*#+}} xmm4 = mem[0],zero
1387 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm5
1388 ; X86-SSE2-NEXT: psrlw $1, %xmm5
1389 ; X86-SSE2-NEXT: pand %xmm1, %xmm5
1390 ; X86-SSE2-NEXT: psubb %xmm5, %xmm4
1391 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm1
1392 ; X86-SSE2-NEXT: pand %xmm0, %xmm1
1393 ; X86-SSE2-NEXT: psrlw $2, %xmm4
1394 ; X86-SSE2-NEXT: pand %xmm0, %xmm4
1395 ; X86-SSE2-NEXT: paddb %xmm1, %xmm4
1396 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm0
1397 ; X86-SSE2-NEXT: psrlw $4, %xmm0
1398 ; X86-SSE2-NEXT: paddb %xmm4, %xmm0
1399 ; X86-SSE2-NEXT: pand %xmm2, %xmm0
1400 ; X86-SSE2-NEXT: psadbw %xmm3, %xmm0
1401 ; X86-SSE2-NEXT: movd %xmm0, %edx
1402 ; X86-SSE2-NEXT: addl %ecx, %edx
1403 ; X86-SSE2-NEXT: xorl %ecx, %ecx
1404 ; X86-SSE2-NEXT: movl %ecx, 12(%eax)
1405 ; X86-SSE2-NEXT: movl %ecx, 8(%eax)
1406 ; X86-SSE2-NEXT: movl %ecx, 4(%eax)
1407 ; X86-SSE2-NEXT: movl %edx, (%eax)
1408 ; X86-SSE2-NEXT: retl $4
1410 ; X86-SSSE3-LABEL: cnt128_pgso:
1411 ; X86-SSSE3: # %bb.0:
1412 ; X86-SSSE3-NEXT: movl {{[0-9]+}}(%esp), %eax
1413 ; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1414 ; X86-SSSE3-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
1415 ; X86-SSSE3-NEXT: movdqa %xmm2, %xmm3
1416 ; X86-SSSE3-NEXT: pand %xmm1, %xmm3
1417 ; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1418 ; X86-SSSE3-NEXT: movdqa %xmm0, %xmm4
1419 ; X86-SSSE3-NEXT: pshufb %xmm3, %xmm4
1420 ; X86-SSSE3-NEXT: psrlw $4, %xmm2
1421 ; X86-SSSE3-NEXT: pand %xmm1, %xmm2
1422 ; X86-SSSE3-NEXT: movdqa %xmm0, %xmm3
1423 ; X86-SSSE3-NEXT: pshufb %xmm2, %xmm3
1424 ; X86-SSSE3-NEXT: paddb %xmm4, %xmm3
1425 ; X86-SSSE3-NEXT: pxor %xmm2, %xmm2
1426 ; X86-SSSE3-NEXT: psadbw %xmm2, %xmm3
1427 ; X86-SSSE3-NEXT: movd %xmm3, %ecx
1428 ; X86-SSSE3-NEXT: movq {{.*#+}} xmm3 = mem[0],zero
1429 ; X86-SSSE3-NEXT: movdqa %xmm3, %xmm4
1430 ; X86-SSSE3-NEXT: pand %xmm1, %xmm4
1431 ; X86-SSSE3-NEXT: movdqa %xmm0, %xmm5
1432 ; X86-SSSE3-NEXT: pshufb %xmm4, %xmm5
1433 ; X86-SSSE3-NEXT: psrlw $4, %xmm3
1434 ; X86-SSSE3-NEXT: pand %xmm1, %xmm3
1435 ; X86-SSSE3-NEXT: pshufb %xmm3, %xmm0
1436 ; X86-SSSE3-NEXT: paddb %xmm5, %xmm0
1437 ; X86-SSSE3-NEXT: psadbw %xmm2, %xmm0
1438 ; X86-SSSE3-NEXT: movd %xmm0, %edx
1439 ; X86-SSSE3-NEXT: addl %ecx, %edx
1440 ; X86-SSSE3-NEXT: xorl %ecx, %ecx
1441 ; X86-SSSE3-NEXT: movl %ecx, 12(%eax)
1442 ; X86-SSSE3-NEXT: movl %ecx, 8(%eax)
1443 ; X86-SSSE3-NEXT: movl %ecx, 4(%eax)
1444 ; X86-SSSE3-NEXT: movl %edx, (%eax)
1445 ; X86-SSSE3-NEXT: retl $4
1446 %cnt = tail call i128 @llvm.ctpop.i128(i128 %x)
1450 define i32 @popcount_zext_i32(i16 zeroext %x) {
1451 ; X86-LABEL: popcount_zext_i32:
1453 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
1454 ; X86-NEXT: movl %eax, %ecx
1455 ; X86-NEXT: shrl %ecx
1456 ; X86-NEXT: andl $21845, %ecx # imm = 0x5555
1457 ; X86-NEXT: subl %ecx, %eax
1458 ; X86-NEXT: movl %eax, %ecx
1459 ; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
1460 ; X86-NEXT: shrl $2, %eax
1461 ; X86-NEXT: andl $858993459, %eax # imm = 0x33333333
1462 ; X86-NEXT: addl %ecx, %eax
1463 ; X86-NEXT: movl %eax, %ecx
1464 ; X86-NEXT: shrl $4, %ecx
1465 ; X86-NEXT: addl %eax, %ecx
1466 ; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
1467 ; X86-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
1468 ; X86-NEXT: shrl $24, %eax
1471 ; X64-LABEL: popcount_zext_i32:
1473 ; X64-NEXT: movl %edi, %eax
1474 ; X64-NEXT: shrl %eax
1475 ; X64-NEXT: andl $21845, %eax # imm = 0x5555
1476 ; X64-NEXT: subl %eax, %edi
1477 ; X64-NEXT: movl %edi, %eax
1478 ; X64-NEXT: andl $858993459, %eax # imm = 0x33333333
1479 ; X64-NEXT: shrl $2, %edi
1480 ; X64-NEXT: andl $858993459, %edi # imm = 0x33333333
1481 ; X64-NEXT: addl %eax, %edi
1482 ; X64-NEXT: movl %edi, %eax
1483 ; X64-NEXT: shrl $4, %eax
1484 ; X64-NEXT: addl %edi, %eax
1485 ; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
1486 ; X64-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
1487 ; X64-NEXT: shrl $24, %eax
1490 ; X86-POPCNT-LABEL: popcount_zext_i32:
1491 ; X86-POPCNT: # %bb.0:
1492 ; X86-POPCNT-NEXT: movzwl {{[0-9]+}}(%esp), %eax
1493 ; X86-POPCNT-NEXT: popcntl %eax, %eax
1494 ; X86-POPCNT-NEXT: retl
1496 ; X64-POPCNT-LABEL: popcount_zext_i32:
1497 ; X64-POPCNT: # %bb.0:
1498 ; X64-POPCNT-NEXT: popcntl %edi, %eax
1499 ; X64-POPCNT-NEXT: retq
1500 %z = zext i16 %x to i32
1501 %cnt = tail call i32 @llvm.ctpop.i32(i32 %z)
1505 define i32 @popcount_i16_zext(i16 zeroext %x) {
1506 ; X86-LABEL: popcount_i16_zext:
1508 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
1509 ; X86-NEXT: movl %eax, %ecx
1510 ; X86-NEXT: shrl %ecx
1511 ; X86-NEXT: andl $21845, %ecx # imm = 0x5555
1512 ; X86-NEXT: subl %ecx, %eax
1513 ; X86-NEXT: movl %eax, %ecx
1514 ; X86-NEXT: andl $13107, %ecx # imm = 0x3333
1515 ; X86-NEXT: shrl $2, %eax
1516 ; X86-NEXT: andl $13107, %eax # imm = 0x3333
1517 ; X86-NEXT: addl %ecx, %eax
1518 ; X86-NEXT: movl %eax, %ecx
1519 ; X86-NEXT: shrl $4, %ecx
1520 ; X86-NEXT: addl %eax, %ecx
1521 ; X86-NEXT: andl $3855, %ecx # imm = 0xF0F
1522 ; X86-NEXT: movl %ecx, %eax
1523 ; X86-NEXT: shrl $8, %eax
1524 ; X86-NEXT: addl %ecx, %eax
1525 ; X86-NEXT: movzbl %al, %eax
1528 ; X64-LABEL: popcount_i16_zext:
1530 ; X64-NEXT: movl %edi, %eax
1531 ; X64-NEXT: shrl %eax
1532 ; X64-NEXT: andl $21845, %eax # imm = 0x5555
1533 ; X64-NEXT: subl %eax, %edi
1534 ; X64-NEXT: movl %edi, %eax
1535 ; X64-NEXT: andl $13107, %eax # imm = 0x3333
1536 ; X64-NEXT: shrl $2, %edi
1537 ; X64-NEXT: andl $13107, %edi # imm = 0x3333
1538 ; X64-NEXT: addl %eax, %edi
1539 ; X64-NEXT: movl %edi, %eax
1540 ; X64-NEXT: shrl $4, %eax
1541 ; X64-NEXT: addl %edi, %eax
1542 ; X64-NEXT: andl $3855, %eax # imm = 0xF0F
1543 ; X64-NEXT: movl %eax, %ecx
1544 ; X64-NEXT: shrl $8, %ecx
1545 ; X64-NEXT: addl %eax, %ecx
1546 ; X64-NEXT: movzbl %cl, %eax
1549 ; X86-POPCNT-LABEL: popcount_i16_zext:
1550 ; X86-POPCNT: # %bb.0:
1551 ; X86-POPCNT-NEXT: movzwl {{[0-9]+}}(%esp), %eax
1552 ; X86-POPCNT-NEXT: popcntl %eax, %eax
1553 ; X86-POPCNT-NEXT: retl
1555 ; X64-POPCNT-LABEL: popcount_i16_zext:
1556 ; X64-POPCNT: # %bb.0:
1557 ; X64-POPCNT-NEXT: popcntl %edi, %eax
1558 ; X64-POPCNT-NEXT: retq
1559 %cnt = tail call i16 @llvm.ctpop.i16(i16 %x)
1560 %z = zext i16 %cnt to i32
1564 declare i8 @llvm.ctpop.i8(i8) nounwind readnone
1565 declare i16 @llvm.ctpop.i16(i16) nounwind readnone
1566 declare i32 @llvm.ctpop.i32(i32) nounwind readnone
1567 declare i64 @llvm.ctpop.i64(i64) nounwind readnone
1568 declare i128 @llvm.ctpop.i128(i128) nounwind readnone
1570 !llvm.module.flags = !{!0}
1571 !0 = !{i32 1, !"ProfileSummary", !1}
1572 !1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
1573 !2 = !{!"ProfileFormat", !"InstrProf"}
1574 !3 = !{!"TotalCount", i64 10000}
1575 !4 = !{!"MaxCount", i64 10}
1576 !5 = !{!"MaxInternalCount", i64 1}
1577 !6 = !{!"MaxFunctionCount", i64 1000}
1578 !7 = !{!"NumCounts", i64 3}
1579 !8 = !{!"NumFunctions", i64 3}
1580 !9 = !{!"DetailedSummary", !10}
1581 !10 = !{!11, !12, !13}
1582 !11 = !{i32 10000, i64 100, i32 1}
1583 !12 = !{i32 999000, i64 100, i32 1}
1584 !13 = !{i32 999999, i64 1, i32 2}
1585 !14 = !{!"function_entry_count", i64 0}