1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X32,X32-NOSSE
3 ; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64
4 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+popcnt | FileCheck %s --check-prefix=X32-POPCNT
5 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+popcnt | FileCheck %s --check-prefix=X64-POPCNT
6 ; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefixes=X32,X32-SSE2
7 ; RUN: llc < %s -mtriple=i686-unknown -mattr=ssse3 | FileCheck %s --check-prefixes=X32,X32-SSSE3
9 define i8 @cnt8(i8 %x) nounwind readnone {
12 ; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
13 ; X32-NEXT: movl %ecx, %eax
15 ; X32-NEXT: andb $85, %al
16 ; X32-NEXT: subb %al, %cl
17 ; X32-NEXT: movl %ecx, %eax
18 ; X32-NEXT: andb $51, %al
19 ; X32-NEXT: shrb $2, %cl
20 ; X32-NEXT: andb $51, %cl
21 ; X32-NEXT: addb %al, %cl
22 ; X32-NEXT: movl %ecx, %eax
23 ; X32-NEXT: shrb $4, %al
24 ; X32-NEXT: addb %cl, %al
25 ; X32-NEXT: andb $15, %al
30 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
31 ; X64-NEXT: movl %edi, %eax
33 ; X64-NEXT: andb $85, %al
34 ; X64-NEXT: subb %al, %dil
35 ; X64-NEXT: movl %edi, %eax
36 ; X64-NEXT: andb $51, %al
37 ; X64-NEXT: shrb $2, %dil
38 ; X64-NEXT: andb $51, %dil
39 ; X64-NEXT: addb %al, %dil
40 ; X64-NEXT: movl %edi, %eax
41 ; X64-NEXT: shrb $4, %al
42 ; X64-NEXT: addl %edi, %eax
43 ; X64-NEXT: andb $15, %al
44 ; X64-NEXT: # kill: def $al killed $al killed $eax
47 ; X32-POPCNT-LABEL: cnt8:
48 ; X32-POPCNT: # %bb.0:
49 ; X32-POPCNT-NEXT: movzbl {{[0-9]+}}(%esp), %eax
50 ; X32-POPCNT-NEXT: popcntl %eax, %eax
51 ; X32-POPCNT-NEXT: # kill: def $al killed $al killed $eax
52 ; X32-POPCNT-NEXT: retl
54 ; X64-POPCNT-LABEL: cnt8:
55 ; X64-POPCNT: # %bb.0:
56 ; X64-POPCNT-NEXT: movzbl %dil, %eax
57 ; X64-POPCNT-NEXT: popcntl %eax, %eax
58 ; X64-POPCNT-NEXT: # kill: def $al killed $al killed $eax
59 ; X64-POPCNT-NEXT: retq
60 %cnt = tail call i8 @llvm.ctpop.i8(i8 %x)
64 define i16 @cnt16(i16 %x) nounwind readnone {
67 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
68 ; X32-NEXT: movl %eax, %ecx
70 ; X32-NEXT: andl $21845, %ecx # imm = 0x5555
71 ; X32-NEXT: subl %ecx, %eax
72 ; X32-NEXT: movl %eax, %ecx
73 ; X32-NEXT: andl $13107, %ecx # imm = 0x3333
74 ; X32-NEXT: shrl $2, %eax
75 ; X32-NEXT: andl $13107, %eax # imm = 0x3333
76 ; X32-NEXT: addl %ecx, %eax
77 ; X32-NEXT: movl %eax, %ecx
78 ; X32-NEXT: shrl $4, %ecx
79 ; X32-NEXT: addl %eax, %ecx
80 ; X32-NEXT: andl $3855, %ecx # imm = 0xF0F
81 ; X32-NEXT: movl %ecx, %eax
82 ; X32-NEXT: shll $8, %eax
83 ; X32-NEXT: addl %ecx, %eax
84 ; X32-NEXT: movzbl %ah, %eax
85 ; X32-NEXT: # kill: def $ax killed $ax killed $eax
90 ; X64-NEXT: movl %edi, %eax
92 ; X64-NEXT: andl $21845, %eax # imm = 0x5555
93 ; X64-NEXT: subl %eax, %edi
94 ; X64-NEXT: movl %edi, %eax
95 ; X64-NEXT: andl $13107, %eax # imm = 0x3333
96 ; X64-NEXT: shrl $2, %edi
97 ; X64-NEXT: andl $13107, %edi # imm = 0x3333
98 ; X64-NEXT: addl %eax, %edi
99 ; X64-NEXT: movl %edi, %eax
100 ; X64-NEXT: shrl $4, %eax
101 ; X64-NEXT: addl %edi, %eax
102 ; X64-NEXT: andl $3855, %eax # imm = 0xF0F
103 ; X64-NEXT: movl %eax, %ecx
104 ; X64-NEXT: shll $8, %ecx
105 ; X64-NEXT: addl %eax, %ecx
106 ; X64-NEXT: movzbl %ch, %eax
107 ; X64-NEXT: # kill: def $ax killed $ax killed $eax
110 ; X32-POPCNT-LABEL: cnt16:
111 ; X32-POPCNT: # %bb.0:
112 ; X32-POPCNT-NEXT: popcntw {{[0-9]+}}(%esp), %ax
113 ; X32-POPCNT-NEXT: retl
115 ; X64-POPCNT-LABEL: cnt16:
116 ; X64-POPCNT: # %bb.0:
117 ; X64-POPCNT-NEXT: popcntw %di, %ax
118 ; X64-POPCNT-NEXT: retq
119 %cnt = tail call i16 @llvm.ctpop.i16(i16 %x)
123 define i32 @cnt32(i32 %x) nounwind readnone {
126 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
127 ; X32-NEXT: movl %eax, %ecx
128 ; X32-NEXT: shrl %ecx
129 ; X32-NEXT: andl $1431655765, %ecx # imm = 0x55555555
130 ; X32-NEXT: subl %ecx, %eax
131 ; X32-NEXT: movl %eax, %ecx
132 ; X32-NEXT: andl $858993459, %ecx # imm = 0x33333333
133 ; X32-NEXT: shrl $2, %eax
134 ; X32-NEXT: andl $858993459, %eax # imm = 0x33333333
135 ; X32-NEXT: addl %ecx, %eax
136 ; X32-NEXT: movl %eax, %ecx
137 ; X32-NEXT: shrl $4, %ecx
138 ; X32-NEXT: addl %eax, %ecx
139 ; X32-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
140 ; X32-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
141 ; X32-NEXT: shrl $24, %eax
146 ; X64-NEXT: movl %edi, %eax
147 ; X64-NEXT: shrl %eax
148 ; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555
149 ; X64-NEXT: subl %eax, %edi
150 ; X64-NEXT: movl %edi, %eax
151 ; X64-NEXT: andl $858993459, %eax # imm = 0x33333333
152 ; X64-NEXT: shrl $2, %edi
153 ; X64-NEXT: andl $858993459, %edi # imm = 0x33333333
154 ; X64-NEXT: addl %eax, %edi
155 ; X64-NEXT: movl %edi, %eax
156 ; X64-NEXT: shrl $4, %eax
157 ; X64-NEXT: addl %edi, %eax
158 ; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
159 ; X64-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
160 ; X64-NEXT: shrl $24, %eax
163 ; X32-POPCNT-LABEL: cnt32:
164 ; X32-POPCNT: # %bb.0:
165 ; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
166 ; X32-POPCNT-NEXT: retl
168 ; X64-POPCNT-LABEL: cnt32:
169 ; X64-POPCNT: # %bb.0:
170 ; X64-POPCNT-NEXT: popcntl %edi, %eax
171 ; X64-POPCNT-NEXT: retq
172 %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
176 define i64 @cnt64(i64 %x) nounwind readnone {
177 ; X32-NOSSE-LABEL: cnt64:
178 ; X32-NOSSE: # %bb.0:
179 ; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
180 ; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
181 ; X32-NOSSE-NEXT: movl %ecx, %edx
182 ; X32-NOSSE-NEXT: shrl %edx
183 ; X32-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555
184 ; X32-NOSSE-NEXT: subl %edx, %ecx
185 ; X32-NOSSE-NEXT: movl %ecx, %edx
186 ; X32-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333
187 ; X32-NOSSE-NEXT: shrl $2, %ecx
188 ; X32-NOSSE-NEXT: andl $858993459, %ecx # imm = 0x33333333
189 ; X32-NOSSE-NEXT: addl %edx, %ecx
190 ; X32-NOSSE-NEXT: movl %ecx, %edx
191 ; X32-NOSSE-NEXT: shrl $4, %edx
192 ; X32-NOSSE-NEXT: addl %ecx, %edx
193 ; X32-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
194 ; X32-NOSSE-NEXT: imull $16843009, %edx, %ecx # imm = 0x1010101
195 ; X32-NOSSE-NEXT: shrl $24, %ecx
196 ; X32-NOSSE-NEXT: movl %eax, %edx
197 ; X32-NOSSE-NEXT: shrl %edx
198 ; X32-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555
199 ; X32-NOSSE-NEXT: subl %edx, %eax
200 ; X32-NOSSE-NEXT: movl %eax, %edx
201 ; X32-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333
202 ; X32-NOSSE-NEXT: shrl $2, %eax
203 ; X32-NOSSE-NEXT: andl $858993459, %eax # imm = 0x33333333
204 ; X32-NOSSE-NEXT: addl %edx, %eax
205 ; X32-NOSSE-NEXT: movl %eax, %edx
206 ; X32-NOSSE-NEXT: shrl $4, %edx
207 ; X32-NOSSE-NEXT: addl %eax, %edx
208 ; X32-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
209 ; X32-NOSSE-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101
210 ; X32-NOSSE-NEXT: shrl $24, %eax
211 ; X32-NOSSE-NEXT: addl %ecx, %eax
212 ; X32-NOSSE-NEXT: xorl %edx, %edx
213 ; X32-NOSSE-NEXT: retl
217 ; X64-NEXT: movq %rdi, %rax
218 ; X64-NEXT: shrq %rax
219 ; X64-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
220 ; X64-NEXT: andq %rax, %rcx
221 ; X64-NEXT: subq %rcx, %rdi
222 ; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
223 ; X64-NEXT: movq %rdi, %rcx
224 ; X64-NEXT: andq %rax, %rcx
225 ; X64-NEXT: shrq $2, %rdi
226 ; X64-NEXT: andq %rax, %rdi
227 ; X64-NEXT: addq %rcx, %rdi
228 ; X64-NEXT: movq %rdi, %rax
229 ; X64-NEXT: shrq $4, %rax
230 ; X64-NEXT: addq %rdi, %rax
231 ; X64-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
232 ; X64-NEXT: andq %rax, %rcx
233 ; X64-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101
234 ; X64-NEXT: imulq %rcx, %rax
235 ; X64-NEXT: shrq $56, %rax
238 ; X32-POPCNT-LABEL: cnt64:
239 ; X32-POPCNT: # %bb.0:
240 ; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
241 ; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
242 ; X32-POPCNT-NEXT: addl %ecx, %eax
243 ; X32-POPCNT-NEXT: xorl %edx, %edx
244 ; X32-POPCNT-NEXT: retl
246 ; X64-POPCNT-LABEL: cnt64:
247 ; X64-POPCNT: # %bb.0:
248 ; X64-POPCNT-NEXT: popcntq %rdi, %rax
249 ; X64-POPCNT-NEXT: retq
251 ; X32-SSE2-LABEL: cnt64:
253 ; X32-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
254 ; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
255 ; X32-SSE2-NEXT: psrlw $1, %xmm1
256 ; X32-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm1
257 ; X32-SSE2-NEXT: psubb %xmm1, %xmm0
258 ; X32-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
259 ; X32-SSE2-NEXT: movdqa %xmm0, %xmm2
260 ; X32-SSE2-NEXT: pand %xmm1, %xmm2
261 ; X32-SSE2-NEXT: psrlw $2, %xmm0
262 ; X32-SSE2-NEXT: pand %xmm1, %xmm0
263 ; X32-SSE2-NEXT: paddb %xmm2, %xmm0
264 ; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
265 ; X32-SSE2-NEXT: psrlw $4, %xmm1
266 ; X32-SSE2-NEXT: paddb %xmm0, %xmm1
267 ; X32-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm1
268 ; X32-SSE2-NEXT: pxor %xmm0, %xmm0
269 ; X32-SSE2-NEXT: psadbw %xmm1, %xmm0
270 ; X32-SSE2-NEXT: movd %xmm0, %eax
271 ; X32-SSE2-NEXT: xorl %edx, %edx
272 ; X32-SSE2-NEXT: retl
274 ; X32-SSSE3-LABEL: cnt64:
275 ; X32-SSSE3: # %bb.0:
276 ; X32-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
277 ; X32-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
278 ; X32-SSSE3-NEXT: movdqa %xmm1, %xmm2
279 ; X32-SSSE3-NEXT: pand %xmm0, %xmm2
280 ; X32-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
281 ; X32-SSSE3-NEXT: movdqa %xmm3, %xmm4
282 ; X32-SSSE3-NEXT: pshufb %xmm2, %xmm4
283 ; X32-SSSE3-NEXT: psrlw $4, %xmm1
284 ; X32-SSSE3-NEXT: pand %xmm0, %xmm1
285 ; X32-SSSE3-NEXT: pshufb %xmm1, %xmm3
286 ; X32-SSSE3-NEXT: paddb %xmm4, %xmm3
287 ; X32-SSSE3-NEXT: pxor %xmm0, %xmm0
288 ; X32-SSSE3-NEXT: psadbw %xmm3, %xmm0
289 ; X32-SSSE3-NEXT: movd %xmm0, %eax
290 ; X32-SSSE3-NEXT: xorl %edx, %edx
291 ; X32-SSSE3-NEXT: retl
292 %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
296 define i128 @cnt128(i128 %x) nounwind readnone {
297 ; X32-NOSSE-LABEL: cnt128:
298 ; X32-NOSSE: # %bb.0:
299 ; X32-NOSSE-NEXT: pushl %ebx
300 ; X32-NOSSE-NEXT: pushl %edi
301 ; X32-NOSSE-NEXT: pushl %esi
302 ; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
303 ; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
304 ; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
305 ; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
306 ; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edi
307 ; X32-NOSSE-NEXT: movl %edi, %ebx
308 ; X32-NOSSE-NEXT: shrl %ebx
309 ; X32-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555
310 ; X32-NOSSE-NEXT: subl %ebx, %edi
311 ; X32-NOSSE-NEXT: movl %edi, %ebx
312 ; X32-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333
313 ; X32-NOSSE-NEXT: shrl $2, %edi
314 ; X32-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333
315 ; X32-NOSSE-NEXT: addl %ebx, %edi
316 ; X32-NOSSE-NEXT: movl %edi, %ebx
317 ; X32-NOSSE-NEXT: shrl $4, %ebx
318 ; X32-NOSSE-NEXT: addl %edi, %ebx
319 ; X32-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F
320 ; X32-NOSSE-NEXT: imull $16843009, %ebx, %edi # imm = 0x1010101
321 ; X32-NOSSE-NEXT: shrl $24, %edi
322 ; X32-NOSSE-NEXT: movl %esi, %ebx
323 ; X32-NOSSE-NEXT: shrl %ebx
324 ; X32-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555
325 ; X32-NOSSE-NEXT: subl %ebx, %esi
326 ; X32-NOSSE-NEXT: movl %esi, %ebx
327 ; X32-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333
328 ; X32-NOSSE-NEXT: shrl $2, %esi
329 ; X32-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333
330 ; X32-NOSSE-NEXT: addl %ebx, %esi
331 ; X32-NOSSE-NEXT: movl %esi, %ebx
332 ; X32-NOSSE-NEXT: shrl $4, %ebx
333 ; X32-NOSSE-NEXT: addl %esi, %ebx
334 ; X32-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F
335 ; X32-NOSSE-NEXT: imull $16843009, %ebx, %esi # imm = 0x1010101
336 ; X32-NOSSE-NEXT: shrl $24, %esi
337 ; X32-NOSSE-NEXT: addl %edi, %esi
338 ; X32-NOSSE-NEXT: movl %edx, %edi
339 ; X32-NOSSE-NEXT: shrl %edi
340 ; X32-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555
341 ; X32-NOSSE-NEXT: subl %edi, %edx
342 ; X32-NOSSE-NEXT: movl %edx, %edi
343 ; X32-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333
344 ; X32-NOSSE-NEXT: shrl $2, %edx
345 ; X32-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333
346 ; X32-NOSSE-NEXT: addl %edi, %edx
347 ; X32-NOSSE-NEXT: movl %edx, %edi
348 ; X32-NOSSE-NEXT: shrl $4, %edi
349 ; X32-NOSSE-NEXT: addl %edx, %edi
350 ; X32-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
351 ; X32-NOSSE-NEXT: imull $16843009, %edi, %edx # imm = 0x1010101
352 ; X32-NOSSE-NEXT: shrl $24, %edx
353 ; X32-NOSSE-NEXT: movl %ecx, %edi
354 ; X32-NOSSE-NEXT: shrl %edi
355 ; X32-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555
356 ; X32-NOSSE-NEXT: subl %edi, %ecx
357 ; X32-NOSSE-NEXT: movl %ecx, %edi
358 ; X32-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333
359 ; X32-NOSSE-NEXT: shrl $2, %ecx
360 ; X32-NOSSE-NEXT: andl $858993459, %ecx # imm = 0x33333333
361 ; X32-NOSSE-NEXT: addl %edi, %ecx
362 ; X32-NOSSE-NEXT: movl %ecx, %edi
363 ; X32-NOSSE-NEXT: shrl $4, %edi
364 ; X32-NOSSE-NEXT: addl %ecx, %edi
365 ; X32-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
366 ; X32-NOSSE-NEXT: imull $16843009, %edi, %ecx # imm = 0x1010101
367 ; X32-NOSSE-NEXT: shrl $24, %ecx
368 ; X32-NOSSE-NEXT: addl %edx, %ecx
369 ; X32-NOSSE-NEXT: addl %esi, %ecx
370 ; X32-NOSSE-NEXT: movl %ecx, (%eax)
371 ; X32-NOSSE-NEXT: movl $0, 12(%eax)
372 ; X32-NOSSE-NEXT: movl $0, 8(%eax)
373 ; X32-NOSSE-NEXT: movl $0, 4(%eax)
374 ; X32-NOSSE-NEXT: popl %esi
375 ; X32-NOSSE-NEXT: popl %edi
376 ; X32-NOSSE-NEXT: popl %ebx
377 ; X32-NOSSE-NEXT: retl $4
381 ; X64-NEXT: movq %rsi, %rax
382 ; X64-NEXT: shrq %rax
383 ; X64-NEXT: movabsq $6148914691236517205, %r8 # imm = 0x5555555555555555
384 ; X64-NEXT: andq %r8, %rax
385 ; X64-NEXT: subq %rax, %rsi
386 ; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
387 ; X64-NEXT: movq %rsi, %rcx
388 ; X64-NEXT: andq %rax, %rcx
389 ; X64-NEXT: shrq $2, %rsi
390 ; X64-NEXT: andq %rax, %rsi
391 ; X64-NEXT: addq %rcx, %rsi
392 ; X64-NEXT: movq %rsi, %rcx
393 ; X64-NEXT: shrq $4, %rcx
394 ; X64-NEXT: addq %rsi, %rcx
395 ; X64-NEXT: movabsq $1085102592571150095, %r9 # imm = 0xF0F0F0F0F0F0F0F
396 ; X64-NEXT: andq %r9, %rcx
397 ; X64-NEXT: movabsq $72340172838076673, %rdx # imm = 0x101010101010101
398 ; X64-NEXT: imulq %rdx, %rcx
399 ; X64-NEXT: shrq $56, %rcx
400 ; X64-NEXT: movq %rdi, %rsi
401 ; X64-NEXT: shrq %rsi
402 ; X64-NEXT: andq %r8, %rsi
403 ; X64-NEXT: subq %rsi, %rdi
404 ; X64-NEXT: movq %rdi, %rsi
405 ; X64-NEXT: andq %rax, %rsi
406 ; X64-NEXT: shrq $2, %rdi
407 ; X64-NEXT: andq %rax, %rdi
408 ; X64-NEXT: addq %rsi, %rdi
409 ; X64-NEXT: movq %rdi, %rax
410 ; X64-NEXT: shrq $4, %rax
411 ; X64-NEXT: addq %rdi, %rax
412 ; X64-NEXT: andq %r9, %rax
413 ; X64-NEXT: imulq %rdx, %rax
414 ; X64-NEXT: shrq $56, %rax
415 ; X64-NEXT: addq %rcx, %rax
416 ; X64-NEXT: xorl %edx, %edx
419 ; X32-POPCNT-LABEL: cnt128:
420 ; X32-POPCNT: # %bb.0:
421 ; X32-POPCNT-NEXT: pushl %esi
422 ; X32-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
423 ; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
424 ; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx
425 ; X32-POPCNT-NEXT: addl %ecx, %edx
426 ; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
427 ; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi
428 ; X32-POPCNT-NEXT: addl %ecx, %esi
429 ; X32-POPCNT-NEXT: addl %edx, %esi
430 ; X32-POPCNT-NEXT: movl %esi, (%eax)
431 ; X32-POPCNT-NEXT: movl $0, 12(%eax)
432 ; X32-POPCNT-NEXT: movl $0, 8(%eax)
433 ; X32-POPCNT-NEXT: movl $0, 4(%eax)
434 ; X32-POPCNT-NEXT: popl %esi
435 ; X32-POPCNT-NEXT: retl $4
437 ; X64-POPCNT-LABEL: cnt128:
438 ; X64-POPCNT: # %bb.0:
439 ; X64-POPCNT-NEXT: popcntq %rsi, %rcx
440 ; X64-POPCNT-NEXT: popcntq %rdi, %rax
441 ; X64-POPCNT-NEXT: addq %rcx, %rax
442 ; X64-POPCNT-NEXT: xorl %edx, %edx
443 ; X64-POPCNT-NEXT: retq
445 ; X32-SSE2-LABEL: cnt128:
447 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
448 ; X32-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
449 ; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
450 ; X32-SSE2-NEXT: psrlw $1, %xmm1
451 ; X32-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
452 ; X32-SSE2-NEXT: pand %xmm2, %xmm1
453 ; X32-SSE2-NEXT: psubb %xmm1, %xmm0
454 ; X32-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
455 ; X32-SSE2-NEXT: movdqa %xmm0, %xmm3
456 ; X32-SSE2-NEXT: pand %xmm1, %xmm3
457 ; X32-SSE2-NEXT: psrlw $2, %xmm0
458 ; X32-SSE2-NEXT: pand %xmm1, %xmm0
459 ; X32-SSE2-NEXT: paddb %xmm3, %xmm0
460 ; X32-SSE2-NEXT: movdqa %xmm0, %xmm3
461 ; X32-SSE2-NEXT: psrlw $4, %xmm3
462 ; X32-SSE2-NEXT: paddb %xmm0, %xmm3
463 ; X32-SSE2-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
464 ; X32-SSE2-NEXT: pand %xmm0, %xmm3
465 ; X32-SSE2-NEXT: pxor %xmm4, %xmm4
466 ; X32-SSE2-NEXT: psadbw %xmm4, %xmm3
467 ; X32-SSE2-NEXT: movd %xmm3, %ecx
468 ; X32-SSE2-NEXT: movq {{.*#+}} xmm3 = mem[0],zero
469 ; X32-SSE2-NEXT: movdqa %xmm3, %xmm5
470 ; X32-SSE2-NEXT: psrlw $1, %xmm5
471 ; X32-SSE2-NEXT: pand %xmm2, %xmm5
472 ; X32-SSE2-NEXT: psubb %xmm5, %xmm3
473 ; X32-SSE2-NEXT: movdqa %xmm3, %xmm2
474 ; X32-SSE2-NEXT: pand %xmm1, %xmm2
475 ; X32-SSE2-NEXT: psrlw $2, %xmm3
476 ; X32-SSE2-NEXT: pand %xmm1, %xmm3
477 ; X32-SSE2-NEXT: paddb %xmm2, %xmm3
478 ; X32-SSE2-NEXT: movdqa %xmm3, %xmm1
479 ; X32-SSE2-NEXT: psrlw $4, %xmm1
480 ; X32-SSE2-NEXT: paddb %xmm3, %xmm1
481 ; X32-SSE2-NEXT: pand %xmm0, %xmm1
482 ; X32-SSE2-NEXT: psadbw %xmm4, %xmm1
483 ; X32-SSE2-NEXT: movd %xmm1, %edx
484 ; X32-SSE2-NEXT: addl %ecx, %edx
485 ; X32-SSE2-NEXT: movl %edx, (%eax)
486 ; X32-SSE2-NEXT: movl $0, 12(%eax)
487 ; X32-SSE2-NEXT: movl $0, 8(%eax)
488 ; X32-SSE2-NEXT: movl $0, 4(%eax)
489 ; X32-SSE2-NEXT: retl $4
491 ; X32-SSSE3-LABEL: cnt128:
492 ; X32-SSSE3: # %bb.0:
493 ; X32-SSSE3-NEXT: movl {{[0-9]+}}(%esp), %eax
494 ; X32-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
495 ; X32-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
496 ; X32-SSSE3-NEXT: movdqa %xmm1, %xmm2
497 ; X32-SSSE3-NEXT: pand %xmm0, %xmm2
498 ; X32-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
499 ; X32-SSSE3-NEXT: movdqa %xmm3, %xmm4
500 ; X32-SSSE3-NEXT: pshufb %xmm2, %xmm4
501 ; X32-SSSE3-NEXT: psrlw $4, %xmm1
502 ; X32-SSSE3-NEXT: pand %xmm0, %xmm1
503 ; X32-SSSE3-NEXT: movdqa %xmm3, %xmm2
504 ; X32-SSSE3-NEXT: pshufb %xmm1, %xmm2
505 ; X32-SSSE3-NEXT: paddb %xmm4, %xmm2
506 ; X32-SSSE3-NEXT: pxor %xmm1, %xmm1
507 ; X32-SSSE3-NEXT: psadbw %xmm1, %xmm2
508 ; X32-SSSE3-NEXT: movd %xmm2, %ecx
509 ; X32-SSSE3-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
510 ; X32-SSSE3-NEXT: movdqa %xmm2, %xmm4
511 ; X32-SSSE3-NEXT: pand %xmm0, %xmm4
512 ; X32-SSSE3-NEXT: movdqa %xmm3, %xmm5
513 ; X32-SSSE3-NEXT: pshufb %xmm4, %xmm5
514 ; X32-SSSE3-NEXT: psrlw $4, %xmm2
515 ; X32-SSSE3-NEXT: pand %xmm0, %xmm2
516 ; X32-SSSE3-NEXT: pshufb %xmm2, %xmm3
517 ; X32-SSSE3-NEXT: paddb %xmm5, %xmm3
518 ; X32-SSSE3-NEXT: psadbw %xmm1, %xmm3
519 ; X32-SSSE3-NEXT: movd %xmm3, %edx
520 ; X32-SSSE3-NEXT: addl %ecx, %edx
521 ; X32-SSSE3-NEXT: movl %edx, (%eax)
522 ; X32-SSSE3-NEXT: movl $0, 12(%eax)
523 ; X32-SSSE3-NEXT: movl $0, 8(%eax)
524 ; X32-SSSE3-NEXT: movl $0, 4(%eax)
525 ; X32-SSSE3-NEXT: retl $4
526 %cnt = tail call i128 @llvm.ctpop.i128(i128 %x)
530 define i64 @cnt64_noimplicitfloat(i64 %x) nounwind readnone noimplicitfloat {
531 ; X32-LABEL: cnt64_noimplicitfloat:
533 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
534 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
535 ; X32-NEXT: movl %ecx, %edx
536 ; X32-NEXT: shrl %edx
537 ; X32-NEXT: andl $1431655765, %edx # imm = 0x55555555
538 ; X32-NEXT: subl %edx, %ecx
539 ; X32-NEXT: movl %ecx, %edx
540 ; X32-NEXT: andl $858993459, %edx # imm = 0x33333333
541 ; X32-NEXT: shrl $2, %ecx
542 ; X32-NEXT: andl $858993459, %ecx # imm = 0x33333333
543 ; X32-NEXT: addl %edx, %ecx
544 ; X32-NEXT: movl %ecx, %edx
545 ; X32-NEXT: shrl $4, %edx
546 ; X32-NEXT: addl %ecx, %edx
547 ; X32-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
548 ; X32-NEXT: imull $16843009, %edx, %ecx # imm = 0x1010101
549 ; X32-NEXT: shrl $24, %ecx
550 ; X32-NEXT: movl %eax, %edx
551 ; X32-NEXT: shrl %edx
552 ; X32-NEXT: andl $1431655765, %edx # imm = 0x55555555
553 ; X32-NEXT: subl %edx, %eax
554 ; X32-NEXT: movl %eax, %edx
555 ; X32-NEXT: andl $858993459, %edx # imm = 0x33333333
556 ; X32-NEXT: shrl $2, %eax
557 ; X32-NEXT: andl $858993459, %eax # imm = 0x33333333
558 ; X32-NEXT: addl %edx, %eax
559 ; X32-NEXT: movl %eax, %edx
560 ; X32-NEXT: shrl $4, %edx
561 ; X32-NEXT: addl %eax, %edx
562 ; X32-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
563 ; X32-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101
564 ; X32-NEXT: shrl $24, %eax
565 ; X32-NEXT: addl %ecx, %eax
566 ; X32-NEXT: xorl %edx, %edx
569 ; X64-LABEL: cnt64_noimplicitfloat:
571 ; X64-NEXT: movq %rdi, %rax
572 ; X64-NEXT: shrq %rax
573 ; X64-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
574 ; X64-NEXT: andq %rax, %rcx
575 ; X64-NEXT: subq %rcx, %rdi
576 ; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
577 ; X64-NEXT: movq %rdi, %rcx
578 ; X64-NEXT: andq %rax, %rcx
579 ; X64-NEXT: shrq $2, %rdi
580 ; X64-NEXT: andq %rax, %rdi
581 ; X64-NEXT: addq %rcx, %rdi
582 ; X64-NEXT: movq %rdi, %rax
583 ; X64-NEXT: shrq $4, %rax
584 ; X64-NEXT: addq %rdi, %rax
585 ; X64-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
586 ; X64-NEXT: andq %rax, %rcx
587 ; X64-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101
588 ; X64-NEXT: imulq %rcx, %rax
589 ; X64-NEXT: shrq $56, %rax
592 ; X32-POPCNT-LABEL: cnt64_noimplicitfloat:
593 ; X32-POPCNT: # %bb.0:
594 ; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
595 ; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
596 ; X32-POPCNT-NEXT: addl %ecx, %eax
597 ; X32-POPCNT-NEXT: xorl %edx, %edx
598 ; X32-POPCNT-NEXT: retl
600 ; X64-POPCNT-LABEL: cnt64_noimplicitfloat:
601 ; X64-POPCNT: # %bb.0:
602 ; X64-POPCNT-NEXT: popcntq %rdi, %rax
603 ; X64-POPCNT-NEXT: retq
604 %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
608 define i32 @cnt32_optsize(i32 %x) nounwind readnone optsize {
609 ; X32-LABEL: cnt32_optsize:
611 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
612 ; X32-NEXT: movl %eax, %ecx
613 ; X32-NEXT: shrl %ecx
614 ; X32-NEXT: andl $1431655765, %ecx # imm = 0x55555555
615 ; X32-NEXT: subl %ecx, %eax
616 ; X32-NEXT: movl $858993459, %ecx # imm = 0x33333333
617 ; X32-NEXT: movl %eax, %edx
618 ; X32-NEXT: andl %ecx, %edx
619 ; X32-NEXT: shrl $2, %eax
620 ; X32-NEXT: andl %ecx, %eax
621 ; X32-NEXT: addl %edx, %eax
622 ; X32-NEXT: movl %eax, %ecx
623 ; X32-NEXT: shrl $4, %ecx
624 ; X32-NEXT: addl %eax, %ecx
625 ; X32-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
626 ; X32-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
627 ; X32-NEXT: shrl $24, %eax
630 ; X64-LABEL: cnt32_optsize:
632 ; X64-NEXT: movl %edi, %eax
633 ; X64-NEXT: shrl %eax
634 ; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555
635 ; X64-NEXT: subl %eax, %edi
636 ; X64-NEXT: movl $858993459, %eax # imm = 0x33333333
637 ; X64-NEXT: movl %edi, %ecx
638 ; X64-NEXT: andl %eax, %ecx
639 ; X64-NEXT: shrl $2, %edi
640 ; X64-NEXT: andl %eax, %edi
641 ; X64-NEXT: addl %ecx, %edi
642 ; X64-NEXT: movl %edi, %eax
643 ; X64-NEXT: shrl $4, %eax
644 ; X64-NEXT: addl %edi, %eax
645 ; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
646 ; X64-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
647 ; X64-NEXT: shrl $24, %eax
650 ; X32-POPCNT-LABEL: cnt32_optsize:
651 ; X32-POPCNT: # %bb.0:
652 ; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
653 ; X32-POPCNT-NEXT: retl
655 ; X64-POPCNT-LABEL: cnt32_optsize:
656 ; X64-POPCNT: # %bb.0:
657 ; X64-POPCNT-NEXT: popcntl %edi, %eax
658 ; X64-POPCNT-NEXT: retq
659 %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
663 define i64 @cnt64_optsize(i64 %x) nounwind readnone optsize {
664 ; X32-NOSSE-LABEL: cnt64_optsize:
665 ; X32-NOSSE: # %bb.0:
666 ; X32-NOSSE-NEXT: pushl %ebx
667 ; X32-NOSSE-NEXT: pushl %edi
668 ; X32-NOSSE-NEXT: pushl %esi
669 ; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
670 ; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
671 ; X32-NOSSE-NEXT: movl %ecx, %edx
672 ; X32-NOSSE-NEXT: shrl %edx
673 ; X32-NOSSE-NEXT: movl $1431655765, %esi # imm = 0x55555555
674 ; X32-NOSSE-NEXT: andl %esi, %edx
675 ; X32-NOSSE-NEXT: subl %edx, %ecx
676 ; X32-NOSSE-NEXT: movl $858993459, %edx # imm = 0x33333333
677 ; X32-NOSSE-NEXT: movl %ecx, %edi
678 ; X32-NOSSE-NEXT: andl %edx, %edi
679 ; X32-NOSSE-NEXT: shrl $2, %ecx
680 ; X32-NOSSE-NEXT: andl %edx, %ecx
681 ; X32-NOSSE-NEXT: addl %edi, %ecx
682 ; X32-NOSSE-NEXT: movl %ecx, %edi
683 ; X32-NOSSE-NEXT: shrl $4, %edi
684 ; X32-NOSSE-NEXT: addl %ecx, %edi
685 ; X32-NOSSE-NEXT: movl $252645135, %ecx # imm = 0xF0F0F0F
686 ; X32-NOSSE-NEXT: andl %ecx, %edi
687 ; X32-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101
688 ; X32-NOSSE-NEXT: shrl $24, %edi
689 ; X32-NOSSE-NEXT: movl %eax, %ebx
690 ; X32-NOSSE-NEXT: shrl %ebx
691 ; X32-NOSSE-NEXT: andl %esi, %ebx
692 ; X32-NOSSE-NEXT: subl %ebx, %eax
693 ; X32-NOSSE-NEXT: movl %eax, %esi
694 ; X32-NOSSE-NEXT: andl %edx, %esi
695 ; X32-NOSSE-NEXT: shrl $2, %eax
696 ; X32-NOSSE-NEXT: andl %edx, %eax
697 ; X32-NOSSE-NEXT: addl %esi, %eax
698 ; X32-NOSSE-NEXT: movl %eax, %edx
699 ; X32-NOSSE-NEXT: shrl $4, %edx
700 ; X32-NOSSE-NEXT: addl %eax, %edx
701 ; X32-NOSSE-NEXT: andl %ecx, %edx
702 ; X32-NOSSE-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101
703 ; X32-NOSSE-NEXT: shrl $24, %eax
704 ; X32-NOSSE-NEXT: addl %edi, %eax
705 ; X32-NOSSE-NEXT: xorl %edx, %edx
706 ; X32-NOSSE-NEXT: popl %esi
707 ; X32-NOSSE-NEXT: popl %edi
708 ; X32-NOSSE-NEXT: popl %ebx
709 ; X32-NOSSE-NEXT: retl
711 ; X64-LABEL: cnt64_optsize:
713 ; X64-NEXT: movq %rdi, %rax
714 ; X64-NEXT: shrq %rax
715 ; X64-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
716 ; X64-NEXT: andq %rax, %rcx
717 ; X64-NEXT: subq %rcx, %rdi
718 ; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
719 ; X64-NEXT: movq %rdi, %rcx
720 ; X64-NEXT: andq %rax, %rcx
721 ; X64-NEXT: shrq $2, %rdi
722 ; X64-NEXT: andq %rax, %rdi
723 ; X64-NEXT: addq %rcx, %rdi
724 ; X64-NEXT: movq %rdi, %rax
725 ; X64-NEXT: shrq $4, %rax
726 ; X64-NEXT: addq %rdi, %rax
727 ; X64-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
728 ; X64-NEXT: andq %rax, %rcx
729 ; X64-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101
730 ; X64-NEXT: imulq %rcx, %rax
731 ; X64-NEXT: shrq $56, %rax
734 ; X32-POPCNT-LABEL: cnt64_optsize:
735 ; X32-POPCNT: # %bb.0:
736 ; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
737 ; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
738 ; X32-POPCNT-NEXT: addl %ecx, %eax
739 ; X32-POPCNT-NEXT: xorl %edx, %edx
740 ; X32-POPCNT-NEXT: retl
742 ; X64-POPCNT-LABEL: cnt64_optsize:
743 ; X64-POPCNT: # %bb.0:
744 ; X64-POPCNT-NEXT: popcntq %rdi, %rax
745 ; X64-POPCNT-NEXT: retq
747 ; X32-SSE2-LABEL: cnt64_optsize:
749 ; X32-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
750 ; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
751 ; X32-SSE2-NEXT: psrlw $1, %xmm1
752 ; X32-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm1
753 ; X32-SSE2-NEXT: psubb %xmm1, %xmm0
754 ; X32-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
755 ; X32-SSE2-NEXT: movdqa %xmm0, %xmm2
756 ; X32-SSE2-NEXT: pand %xmm1, %xmm2
757 ; X32-SSE2-NEXT: psrlw $2, %xmm0
758 ; X32-SSE2-NEXT: pand %xmm1, %xmm0
759 ; X32-SSE2-NEXT: paddb %xmm2, %xmm0
760 ; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
761 ; X32-SSE2-NEXT: psrlw $4, %xmm1
762 ; X32-SSE2-NEXT: paddb %xmm0, %xmm1
763 ; X32-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm1
764 ; X32-SSE2-NEXT: pxor %xmm0, %xmm0
765 ; X32-SSE2-NEXT: psadbw %xmm1, %xmm0
766 ; X32-SSE2-NEXT: movd %xmm0, %eax
767 ; X32-SSE2-NEXT: xorl %edx, %edx
768 ; X32-SSE2-NEXT: retl
770 ; X32-SSSE3-LABEL: cnt64_optsize:
771 ; X32-SSSE3: # %bb.0:
772 ; X32-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
773 ; X32-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
774 ; X32-SSSE3-NEXT: movdqa %xmm1, %xmm2
775 ; X32-SSSE3-NEXT: pand %xmm0, %xmm2
776 ; X32-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
777 ; X32-SSSE3-NEXT: movdqa %xmm3, %xmm4
778 ; X32-SSSE3-NEXT: pshufb %xmm2, %xmm4
779 ; X32-SSSE3-NEXT: psrlw $4, %xmm1
780 ; X32-SSSE3-NEXT: pand %xmm0, %xmm1
781 ; X32-SSSE3-NEXT: pshufb %xmm1, %xmm3
782 ; X32-SSSE3-NEXT: paddb %xmm4, %xmm3
783 ; X32-SSSE3-NEXT: pxor %xmm0, %xmm0
784 ; X32-SSSE3-NEXT: psadbw %xmm3, %xmm0
785 ; X32-SSSE3-NEXT: movd %xmm0, %eax
786 ; X32-SSSE3-NEXT: xorl %edx, %edx
787 ; X32-SSSE3-NEXT: retl
788 %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
792 define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize {
793 ; X32-NOSSE-LABEL: cnt128_optsize:
794 ; X32-NOSSE: # %bb.0:
795 ; X32-NOSSE-NEXT: pushl %ebp
796 ; X32-NOSSE-NEXT: pushl %ebx
797 ; X32-NOSSE-NEXT: pushl %edi
798 ; X32-NOSSE-NEXT: pushl %esi
799 ; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
800 ; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
801 ; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
802 ; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx
803 ; X32-NOSSE-NEXT: movl %ebx, %ecx
804 ; X32-NOSSE-NEXT: shrl %ecx
805 ; X32-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555
806 ; X32-NOSSE-NEXT: andl %edi, %ecx
807 ; X32-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555
808 ; X32-NOSSE-NEXT: subl %ecx, %ebx
809 ; X32-NOSSE-NEXT: movl $858993459, %ecx # imm = 0x33333333
810 ; X32-NOSSE-NEXT: movl %ebx, %ebp
811 ; X32-NOSSE-NEXT: andl %ecx, %ebp
812 ; X32-NOSSE-NEXT: shrl $2, %ebx
813 ; X32-NOSSE-NEXT: andl %ecx, %ebx
814 ; X32-NOSSE-NEXT: addl %ebp, %ebx
815 ; X32-NOSSE-NEXT: movl %ebx, %ebp
816 ; X32-NOSSE-NEXT: shrl $4, %ebp
817 ; X32-NOSSE-NEXT: addl %ebx, %ebp
818 ; X32-NOSSE-NEXT: movl %eax, %ebx
819 ; X32-NOSSE-NEXT: shrl %ebx
820 ; X32-NOSSE-NEXT: andl %edi, %ebx
821 ; X32-NOSSE-NEXT: subl %ebx, %eax
822 ; X32-NOSSE-NEXT: movl %eax, %ebx
823 ; X32-NOSSE-NEXT: andl %ecx, %ebx
824 ; X32-NOSSE-NEXT: shrl $2, %eax
825 ; X32-NOSSE-NEXT: andl %ecx, %eax
826 ; X32-NOSSE-NEXT: addl %ebx, %eax
827 ; X32-NOSSE-NEXT: movl %eax, %edi
828 ; X32-NOSSE-NEXT: shrl $4, %edi
829 ; X32-NOSSE-NEXT: addl %eax, %edi
830 ; X32-NOSSE-NEXT: movl $252645135, %ebx # imm = 0xF0F0F0F
831 ; X32-NOSSE-NEXT: andl %ebx, %ebp
832 ; X32-NOSSE-NEXT: imull $16843009, %ebp, %eax # imm = 0x1010101
833 ; X32-NOSSE-NEXT: shrl $24, %eax
834 ; X32-NOSSE-NEXT: andl %ebx, %edi
835 ; X32-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101
836 ; X32-NOSSE-NEXT: shrl $24, %edi
837 ; X32-NOSSE-NEXT: addl %eax, %edi
838 ; X32-NOSSE-NEXT: movl %esi, %eax
839 ; X32-NOSSE-NEXT: shrl %eax
840 ; X32-NOSSE-NEXT: movl $1431655765, %ebp # imm = 0x55555555
841 ; X32-NOSSE-NEXT: andl %ebp, %eax
842 ; X32-NOSSE-NEXT: subl %eax, %esi
843 ; X32-NOSSE-NEXT: movl %esi, %eax
844 ; X32-NOSSE-NEXT: andl %ecx, %eax
845 ; X32-NOSSE-NEXT: shrl $2, %esi
846 ; X32-NOSSE-NEXT: andl %ecx, %esi
847 ; X32-NOSSE-NEXT: addl %eax, %esi
848 ; X32-NOSSE-NEXT: movl %esi, %eax
849 ; X32-NOSSE-NEXT: shrl $4, %eax
850 ; X32-NOSSE-NEXT: addl %esi, %eax
851 ; X32-NOSSE-NEXT: movl %edx, %esi
852 ; X32-NOSSE-NEXT: shrl %esi
853 ; X32-NOSSE-NEXT: andl %ebp, %esi
854 ; X32-NOSSE-NEXT: subl %esi, %edx
855 ; X32-NOSSE-NEXT: movl %edx, %esi
856 ; X32-NOSSE-NEXT: andl %ecx, %esi
857 ; X32-NOSSE-NEXT: shrl $2, %edx
858 ; X32-NOSSE-NEXT: andl %ecx, %edx
859 ; X32-NOSSE-NEXT: addl %esi, %edx
860 ; X32-NOSSE-NEXT: movl %edx, %ecx
861 ; X32-NOSSE-NEXT: shrl $4, %ecx
862 ; X32-NOSSE-NEXT: addl %edx, %ecx
863 ; X32-NOSSE-NEXT: andl %ebx, %eax
864 ; X32-NOSSE-NEXT: andl %ebx, %ecx
865 ; X32-NOSSE-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
866 ; X32-NOSSE-NEXT: shrl $24, %eax
867 ; X32-NOSSE-NEXT: imull $16843009, %ecx, %ecx # imm = 0x1010101
868 ; X32-NOSSE-NEXT: shrl $24, %ecx
869 ; X32-NOSSE-NEXT: addl %eax, %ecx
870 ; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
871 ; X32-NOSSE-NEXT: addl %edi, %ecx
872 ; X32-NOSSE-NEXT: xorl %edx, %edx
873 ; X32-NOSSE-NEXT: movl %edx, 12(%eax)
874 ; X32-NOSSE-NEXT: movl %edx, 8(%eax)
875 ; X32-NOSSE-NEXT: movl %edx, 4(%eax)
876 ; X32-NOSSE-NEXT: movl %ecx, (%eax)
877 ; X32-NOSSE-NEXT: popl %esi
878 ; X32-NOSSE-NEXT: popl %edi
879 ; X32-NOSSE-NEXT: popl %ebx
880 ; X32-NOSSE-NEXT: popl %ebp
881 ; X32-NOSSE-NEXT: retl $4
883 ; X64-LABEL: cnt128_optsize:
885 ; X64-NEXT: movq %rsi, %rax
886 ; X64-NEXT: shrq %rax
887 ; X64-NEXT: movabsq $6148914691236517205, %r8 # imm = 0x5555555555555555
888 ; X64-NEXT: andq %r8, %rax
889 ; X64-NEXT: subq %rax, %rsi
890 ; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
891 ; X64-NEXT: movq %rsi, %rcx
892 ; X64-NEXT: andq %rax, %rcx
893 ; X64-NEXT: shrq $2, %rsi
894 ; X64-NEXT: andq %rax, %rsi
895 ; X64-NEXT: addq %rcx, %rsi
896 ; X64-NEXT: movq %rsi, %rcx
897 ; X64-NEXT: shrq $4, %rcx
898 ; X64-NEXT: addq %rsi, %rcx
899 ; X64-NEXT: movabsq $1085102592571150095, %r9 # imm = 0xF0F0F0F0F0F0F0F
900 ; X64-NEXT: andq %r9, %rcx
901 ; X64-NEXT: movabsq $72340172838076673, %rdx # imm = 0x101010101010101
902 ; X64-NEXT: imulq %rdx, %rcx
903 ; X64-NEXT: shrq $56, %rcx
904 ; X64-NEXT: movq %rdi, %rsi
905 ; X64-NEXT: shrq %rsi
906 ; X64-NEXT: andq %r8, %rsi
907 ; X64-NEXT: subq %rsi, %rdi
908 ; X64-NEXT: movq %rdi, %rsi
909 ; X64-NEXT: andq %rax, %rsi
910 ; X64-NEXT: shrq $2, %rdi
911 ; X64-NEXT: andq %rax, %rdi
912 ; X64-NEXT: addq %rsi, %rdi
913 ; X64-NEXT: movq %rdi, %rax
914 ; X64-NEXT: shrq $4, %rax
915 ; X64-NEXT: addq %rdi, %rax
916 ; X64-NEXT: andq %r9, %rax
917 ; X64-NEXT: imulq %rdx, %rax
918 ; X64-NEXT: shrq $56, %rax
919 ; X64-NEXT: addq %rcx, %rax
920 ; X64-NEXT: xorl %edx, %edx
923 ; X32-POPCNT-LABEL: cnt128_optsize:
924 ; X32-POPCNT: # %bb.0:
925 ; X32-POPCNT-NEXT: pushl %esi
926 ; X32-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
927 ; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
928 ; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx
929 ; X32-POPCNT-NEXT: addl %ecx, %edx
930 ; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
931 ; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi
932 ; X32-POPCNT-NEXT: addl %ecx, %esi
933 ; X32-POPCNT-NEXT: addl %edx, %esi
934 ; X32-POPCNT-NEXT: xorl %ecx, %ecx
935 ; X32-POPCNT-NEXT: movl %ecx, 12(%eax)
936 ; X32-POPCNT-NEXT: movl %ecx, 8(%eax)
937 ; X32-POPCNT-NEXT: movl %ecx, 4(%eax)
938 ; X32-POPCNT-NEXT: movl %esi, (%eax)
939 ; X32-POPCNT-NEXT: popl %esi
940 ; X32-POPCNT-NEXT: retl $4
942 ; X64-POPCNT-LABEL: cnt128_optsize:
943 ; X64-POPCNT: # %bb.0:
944 ; X64-POPCNT-NEXT: popcntq %rsi, %rcx
945 ; X64-POPCNT-NEXT: popcntq %rdi, %rax
946 ; X64-POPCNT-NEXT: addq %rcx, %rax
947 ; X64-POPCNT-NEXT: xorl %edx, %edx
948 ; X64-POPCNT-NEXT: retq
950 ; X32-SSE2-LABEL: cnt128_optsize:
952 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
953 ; X32-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
954 ; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
955 ; X32-SSE2-NEXT: psrlw $1, %xmm1
956 ; X32-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
957 ; X32-SSE2-NEXT: pand %xmm2, %xmm1
958 ; X32-SSE2-NEXT: psubb %xmm1, %xmm0
959 ; X32-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
960 ; X32-SSE2-NEXT: movdqa %xmm0, %xmm3
961 ; X32-SSE2-NEXT: pand %xmm1, %xmm3
962 ; X32-SSE2-NEXT: psrlw $2, %xmm0
963 ; X32-SSE2-NEXT: pand %xmm1, %xmm0
964 ; X32-SSE2-NEXT: paddb %xmm3, %xmm0
965 ; X32-SSE2-NEXT: movdqa %xmm0, %xmm3
966 ; X32-SSE2-NEXT: psrlw $4, %xmm3
967 ; X32-SSE2-NEXT: paddb %xmm0, %xmm3
968 ; X32-SSE2-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
969 ; X32-SSE2-NEXT: pand %xmm0, %xmm3
970 ; X32-SSE2-NEXT: pxor %xmm4, %xmm4
971 ; X32-SSE2-NEXT: psadbw %xmm4, %xmm3
972 ; X32-SSE2-NEXT: movd %xmm3, %ecx
973 ; X32-SSE2-NEXT: movq {{.*#+}} xmm3 = mem[0],zero
974 ; X32-SSE2-NEXT: movdqa %xmm3, %xmm5
975 ; X32-SSE2-NEXT: psrlw $1, %xmm5
976 ; X32-SSE2-NEXT: pand %xmm2, %xmm5
977 ; X32-SSE2-NEXT: psubb %xmm5, %xmm3
978 ; X32-SSE2-NEXT: movdqa %xmm3, %xmm2
979 ; X32-SSE2-NEXT: pand %xmm1, %xmm2
980 ; X32-SSE2-NEXT: psrlw $2, %xmm3
981 ; X32-SSE2-NEXT: pand %xmm1, %xmm3
982 ; X32-SSE2-NEXT: paddb %xmm2, %xmm3
983 ; X32-SSE2-NEXT: movdqa %xmm3, %xmm1
984 ; X32-SSE2-NEXT: psrlw $4, %xmm1
985 ; X32-SSE2-NEXT: paddb %xmm3, %xmm1
986 ; X32-SSE2-NEXT: pand %xmm0, %xmm1
987 ; X32-SSE2-NEXT: psadbw %xmm4, %xmm1
988 ; X32-SSE2-NEXT: movd %xmm1, %edx
989 ; X32-SSE2-NEXT: addl %ecx, %edx
990 ; X32-SSE2-NEXT: xorl %ecx, %ecx
991 ; X32-SSE2-NEXT: movl %ecx, 12(%eax)
992 ; X32-SSE2-NEXT: movl %ecx, 8(%eax)
993 ; X32-SSE2-NEXT: movl %ecx, 4(%eax)
994 ; X32-SSE2-NEXT: movl %edx, (%eax)
995 ; X32-SSE2-NEXT: retl $4
997 ; X32-SSSE3-LABEL: cnt128_optsize:
998 ; X32-SSSE3: # %bb.0:
999 ; X32-SSSE3-NEXT: movl {{[0-9]+}}(%esp), %eax
1000 ; X32-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1001 ; X32-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
1002 ; X32-SSSE3-NEXT: movdqa %xmm1, %xmm2
1003 ; X32-SSSE3-NEXT: pand %xmm0, %xmm2
1004 ; X32-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
1005 ; X32-SSSE3-NEXT: movdqa %xmm3, %xmm4
1006 ; X32-SSSE3-NEXT: pshufb %xmm2, %xmm4
1007 ; X32-SSSE3-NEXT: psrlw $4, %xmm1
1008 ; X32-SSSE3-NEXT: pand %xmm0, %xmm1
1009 ; X32-SSSE3-NEXT: movdqa %xmm3, %xmm2
1010 ; X32-SSSE3-NEXT: pshufb %xmm1, %xmm2
1011 ; X32-SSSE3-NEXT: paddb %xmm4, %xmm2
1012 ; X32-SSSE3-NEXT: pxor %xmm1, %xmm1
1013 ; X32-SSSE3-NEXT: psadbw %xmm1, %xmm2
1014 ; X32-SSSE3-NEXT: movd %xmm2, %ecx
1015 ; X32-SSSE3-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
1016 ; X32-SSSE3-NEXT: movdqa %xmm2, %xmm4
1017 ; X32-SSSE3-NEXT: pand %xmm0, %xmm4
1018 ; X32-SSSE3-NEXT: movdqa %xmm3, %xmm5
1019 ; X32-SSSE3-NEXT: pshufb %xmm4, %xmm5
1020 ; X32-SSSE3-NEXT: psrlw $4, %xmm2
1021 ; X32-SSSE3-NEXT: pand %xmm0, %xmm2
1022 ; X32-SSSE3-NEXT: pshufb %xmm2, %xmm3
1023 ; X32-SSSE3-NEXT: paddb %xmm5, %xmm3
1024 ; X32-SSSE3-NEXT: psadbw %xmm1, %xmm3
1025 ; X32-SSSE3-NEXT: movd %xmm3, %edx
1026 ; X32-SSSE3-NEXT: addl %ecx, %edx
1027 ; X32-SSSE3-NEXT: xorl %ecx, %ecx
1028 ; X32-SSSE3-NEXT: movl %ecx, 12(%eax)
1029 ; X32-SSSE3-NEXT: movl %ecx, 8(%eax)
1030 ; X32-SSSE3-NEXT: movl %ecx, 4(%eax)
1031 ; X32-SSSE3-NEXT: movl %edx, (%eax)
1032 ; X32-SSSE3-NEXT: retl $4
1033 %cnt = tail call i128 @llvm.ctpop.i128(i128 %x)
1037 declare i8 @llvm.ctpop.i8(i8) nounwind readnone
1038 declare i16 @llvm.ctpop.i16(i16) nounwind readnone
1039 declare i32 @llvm.ctpop.i32(i32) nounwind readnone
1040 declare i64 @llvm.ctpop.i64(i64) nounwind readnone
1041 declare i128 @llvm.ctpop.i128(i128) nounwind readnone