1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mattr=+popcntd < %s | FileCheck %s --check-prefix=FAST
3 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mattr=+slow-popcntd < %s | FileCheck %s --check-prefix=SLOW
4 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff -mattr=+popcntd < %s | FileCheck %s --check-prefix=FAST
5 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff -mattr=+slow-popcntd < %s | FileCheck %s --check-prefix=SLOW
7 define i16 @zpop_i8_i16(i8 %x) {
8 ; FAST-LABEL: zpop_i8_i16:
10 ; FAST-NEXT: clrldi 3, 3, 56
11 ; FAST-NEXT: popcntd 3, 3
14 ; SLOW-LABEL: zpop_i8_i16:
16 ; SLOW-NEXT: clrlwi 5, 3, 24
17 ; SLOW-NEXT: rotlwi 3, 3, 31
18 ; SLOW-NEXT: andi. 3, 3, 85
19 ; SLOW-NEXT: lis 4, 13107
20 ; SLOW-NEXT: sub 3, 5, 3
21 ; SLOW-NEXT: ori 4, 4, 13107
22 ; SLOW-NEXT: rotlwi 5, 3, 30
23 ; SLOW-NEXT: and 3, 3, 4
24 ; SLOW-NEXT: andis. 4, 5, 13107
25 ; SLOW-NEXT: andi. 5, 5, 13107
26 ; SLOW-NEXT: or 4, 5, 4
27 ; SLOW-NEXT: add 3, 3, 4
28 ; SLOW-NEXT: lis 5, 3855
29 ; SLOW-NEXT: srwi 4, 3, 4
30 ; SLOW-NEXT: add 3, 3, 4
31 ; SLOW-NEXT: lis 4, 257
32 ; SLOW-NEXT: ori 5, 5, 3855
33 ; SLOW-NEXT: and 3, 3, 5
34 ; SLOW-NEXT: ori 4, 4, 257
35 ; SLOW-NEXT: mullw 3, 3, 4
36 ; SLOW-NEXT: srwi 3, 3, 24
38 %z = zext i8 %x to i16
39 %pop = tail call i16 @llvm.ctpop.i16(i16 %z)
43 define i16 @popz_i8_i16(i8 %x) {
44 ; FAST-LABEL: popz_i8_i16:
46 ; FAST-NEXT: clrldi 3, 3, 56
47 ; FAST-NEXT: popcntd 3, 3
50 ; SLOW-LABEL: popz_i8_i16:
52 ; SLOW-NEXT: clrlwi 5, 3, 24
53 ; SLOW-NEXT: rotlwi 3, 3, 31
54 ; SLOW-NEXT: andi. 3, 3, 85
55 ; SLOW-NEXT: lis 4, 13107
56 ; SLOW-NEXT: sub 3, 5, 3
57 ; SLOW-NEXT: ori 4, 4, 13107
58 ; SLOW-NEXT: rotlwi 5, 3, 30
59 ; SLOW-NEXT: and 3, 3, 4
60 ; SLOW-NEXT: andis. 4, 5, 13107
61 ; SLOW-NEXT: andi. 5, 5, 13107
62 ; SLOW-NEXT: or 4, 5, 4
63 ; SLOW-NEXT: add 3, 3, 4
64 ; SLOW-NEXT: lis 5, 3855
65 ; SLOW-NEXT: srwi 4, 3, 4
66 ; SLOW-NEXT: add 3, 3, 4
67 ; SLOW-NEXT: lis 4, 257
68 ; SLOW-NEXT: ori 5, 5, 3855
69 ; SLOW-NEXT: and 3, 3, 5
70 ; SLOW-NEXT: ori 4, 4, 257
71 ; SLOW-NEXT: mullw 3, 3, 4
72 ; SLOW-NEXT: rlwinm 3, 3, 8, 24, 31
74 %pop = tail call i8 @llvm.ctpop.i8(i8 %x)
75 %z = zext i8 %pop to i16
79 define i32 @zpop_i8_i32(i8 %x) {
80 ; FAST-LABEL: zpop_i8_i32:
82 ; FAST-NEXT: clrlwi 3, 3, 24
83 ; FAST-NEXT: popcntw 3, 3
86 ; SLOW-LABEL: zpop_i8_i32:
88 ; SLOW-NEXT: clrlwi 5, 3, 24
89 ; SLOW-NEXT: rotlwi 3, 3, 31
90 ; SLOW-NEXT: andi. 3, 3, 85
91 ; SLOW-NEXT: lis 4, 13107
92 ; SLOW-NEXT: sub 3, 5, 3
93 ; SLOW-NEXT: ori 4, 4, 13107
94 ; SLOW-NEXT: rotlwi 5, 3, 30
95 ; SLOW-NEXT: and 3, 3, 4
96 ; SLOW-NEXT: andis. 4, 5, 13107
97 ; SLOW-NEXT: andi. 5, 5, 13107
98 ; SLOW-NEXT: or 4, 5, 4
99 ; SLOW-NEXT: add 3, 3, 4
100 ; SLOW-NEXT: lis 5, 3855
101 ; SLOW-NEXT: srwi 4, 3, 4
102 ; SLOW-NEXT: add 3, 3, 4
103 ; SLOW-NEXT: lis 4, 257
104 ; SLOW-NEXT: ori 5, 5, 3855
105 ; SLOW-NEXT: and 3, 3, 5
106 ; SLOW-NEXT: ori 4, 4, 257
107 ; SLOW-NEXT: mullw 3, 3, 4
108 ; SLOW-NEXT: srwi 3, 3, 24
110 %z = zext i8 %x to i32
111 %pop = tail call i32 @llvm.ctpop.i32(i32 %z)
115 define i32 @popz_i8_32(i8 %x) {
116 ; FAST-LABEL: popz_i8_32:
118 ; FAST-NEXT: clrldi 3, 3, 56
119 ; FAST-NEXT: popcntd 3, 3
122 ; SLOW-LABEL: popz_i8_32:
124 ; SLOW-NEXT: clrlwi 5, 3, 24
125 ; SLOW-NEXT: rotlwi 3, 3, 31
126 ; SLOW-NEXT: andi. 3, 3, 85
127 ; SLOW-NEXT: lis 4, 13107
128 ; SLOW-NEXT: sub 3, 5, 3
129 ; SLOW-NEXT: ori 4, 4, 13107
130 ; SLOW-NEXT: rotlwi 5, 3, 30
131 ; SLOW-NEXT: and 3, 3, 4
132 ; SLOW-NEXT: andis. 4, 5, 13107
133 ; SLOW-NEXT: andi. 5, 5, 13107
134 ; SLOW-NEXT: or 4, 5, 4
135 ; SLOW-NEXT: add 3, 3, 4
136 ; SLOW-NEXT: lis 5, 3855
137 ; SLOW-NEXT: srwi 4, 3, 4
138 ; SLOW-NEXT: add 3, 3, 4
139 ; SLOW-NEXT: lis 4, 257
140 ; SLOW-NEXT: ori 5, 5, 3855
141 ; SLOW-NEXT: and 3, 3, 5
142 ; SLOW-NEXT: ori 4, 4, 257
143 ; SLOW-NEXT: mullw 3, 3, 4
144 ; SLOW-NEXT: rlwinm 3, 3, 8, 24, 31
146 %pop = tail call i8 @llvm.ctpop.i8(i8 %x)
147 %z = zext i8 %pop to i32
151 define i32 @zpop_i16_i32(i16 %x) {
152 ; FAST-LABEL: zpop_i16_i32:
154 ; FAST-NEXT: clrlwi 3, 3, 16
155 ; FAST-NEXT: popcntw 3, 3
158 ; SLOW-LABEL: zpop_i16_i32:
160 ; SLOW-NEXT: clrlwi 5, 3, 16
161 ; SLOW-NEXT: rotlwi 3, 3, 31
162 ; SLOW-NEXT: andi. 3, 3, 21845
163 ; SLOW-NEXT: lis 4, 13107
164 ; SLOW-NEXT: sub 3, 5, 3
165 ; SLOW-NEXT: ori 4, 4, 13107
166 ; SLOW-NEXT: rotlwi 5, 3, 30
167 ; SLOW-NEXT: and 3, 3, 4
168 ; SLOW-NEXT: andis. 4, 5, 13107
169 ; SLOW-NEXT: andi. 5, 5, 13107
170 ; SLOW-NEXT: or 4, 5, 4
171 ; SLOW-NEXT: add 3, 3, 4
172 ; SLOW-NEXT: lis 5, 3855
173 ; SLOW-NEXT: srwi 4, 3, 4
174 ; SLOW-NEXT: add 3, 3, 4
175 ; SLOW-NEXT: lis 4, 257
176 ; SLOW-NEXT: ori 5, 5, 3855
177 ; SLOW-NEXT: and 3, 3, 5
178 ; SLOW-NEXT: ori 4, 4, 257
179 ; SLOW-NEXT: mullw 3, 3, 4
180 ; SLOW-NEXT: srwi 3, 3, 24
182 %z = zext i16 %x to i32
183 %pop = tail call i32 @llvm.ctpop.i32(i32 %z)
187 define i32 @popz_i16_32(i16 %x) {
188 ; FAST-LABEL: popz_i16_32:
190 ; FAST-NEXT: clrldi 3, 3, 48
191 ; FAST-NEXT: popcntd 3, 3
194 ; SLOW-LABEL: popz_i16_32:
196 ; SLOW-NEXT: clrlwi 5, 3, 16
197 ; SLOW-NEXT: rotlwi 3, 3, 31
198 ; SLOW-NEXT: andi. 3, 3, 21845
199 ; SLOW-NEXT: lis 4, 13107
200 ; SLOW-NEXT: sub 3, 5, 3
201 ; SLOW-NEXT: ori 4, 4, 13107
202 ; SLOW-NEXT: rotlwi 5, 3, 30
203 ; SLOW-NEXT: and 3, 3, 4
204 ; SLOW-NEXT: andis. 4, 5, 13107
205 ; SLOW-NEXT: andi. 5, 5, 13107
206 ; SLOW-NEXT: or 4, 5, 4
207 ; SLOW-NEXT: add 3, 3, 4
208 ; SLOW-NEXT: lis 5, 3855
209 ; SLOW-NEXT: srwi 4, 3, 4
210 ; SLOW-NEXT: add 3, 3, 4
211 ; SLOW-NEXT: lis 4, 257
212 ; SLOW-NEXT: ori 5, 5, 3855
213 ; SLOW-NEXT: and 3, 3, 5
214 ; SLOW-NEXT: ori 4, 4, 257
215 ; SLOW-NEXT: mullw 3, 3, 4
216 ; SLOW-NEXT: rlwinm 3, 3, 8, 24, 31
218 %pop = tail call i16 @llvm.ctpop.i16(i16 %x)
219 %z = zext i16 %pop to i32
223 define i64 @zpop_i32_i64(i32 %x) {
224 ; FAST-LABEL: zpop_i32_i64:
226 ; FAST-NEXT: clrldi 3, 3, 32
227 ; FAST-NEXT: popcntd 3, 3
230 ; SLOW-LABEL: zpop_i32_i64:
232 ; SLOW-NEXT: rlwinm 5, 3, 31, 1, 0
233 ; SLOW-NEXT: lis 4, 13107
234 ; SLOW-NEXT: andis. 6, 5, 21845
235 ; SLOW-NEXT: andi. 5, 5, 21845
236 ; SLOW-NEXT: ori 4, 4, 13107
237 ; SLOW-NEXT: or 5, 5, 6
238 ; SLOW-NEXT: clrldi 3, 3, 32
239 ; SLOW-NEXT: rldimi 4, 4, 32, 0
240 ; SLOW-NEXT: sub 3, 3, 5
241 ; SLOW-NEXT: and 5, 3, 4
242 ; SLOW-NEXT: rotldi 3, 3, 62
243 ; SLOW-NEXT: and 3, 3, 4
244 ; SLOW-NEXT: add 3, 5, 3
245 ; SLOW-NEXT: lis 4, 3855
246 ; SLOW-NEXT: rldicl 5, 3, 60, 4
247 ; SLOW-NEXT: ori 4, 4, 3855
248 ; SLOW-NEXT: add 3, 3, 5
249 ; SLOW-NEXT: lis 5, 257
250 ; SLOW-NEXT: rldimi 4, 4, 32, 0
251 ; SLOW-NEXT: ori 5, 5, 257
252 ; SLOW-NEXT: and 3, 3, 4
253 ; SLOW-NEXT: rldimi 5, 5, 32, 0
254 ; SLOW-NEXT: mulld 3, 3, 5
255 ; SLOW-NEXT: rldicl 3, 3, 8, 56
257 %z = zext i32 %x to i64
258 %pop = tail call i64 @llvm.ctpop.i64(i64 %z)
262 define i64 @popz_i32_i64(i32 %x) {
263 ; FAST-LABEL: popz_i32_i64:
265 ; FAST-NEXT: popcntw 3, 3
266 ; FAST-NEXT: clrldi 3, 3, 32
269 ; SLOW-LABEL: popz_i32_i64:
271 ; SLOW-NEXT: rotlwi 5, 3, 31
272 ; SLOW-NEXT: andis. 6, 5, 21845
273 ; SLOW-NEXT: andi. 5, 5, 21845
274 ; SLOW-NEXT: or 5, 5, 6
275 ; SLOW-NEXT: lis 4, 13107
276 ; SLOW-NEXT: sub 3, 3, 5
277 ; SLOW-NEXT: ori 4, 4, 13107
278 ; SLOW-NEXT: rotlwi 5, 3, 30
279 ; SLOW-NEXT: and 3, 3, 4
280 ; SLOW-NEXT: andis. 4, 5, 13107
281 ; SLOW-NEXT: andi. 5, 5, 13107
282 ; SLOW-NEXT: or 4, 5, 4
283 ; SLOW-NEXT: add 3, 3, 4
284 ; SLOW-NEXT: lis 5, 3855
285 ; SLOW-NEXT: srwi 4, 3, 4
286 ; SLOW-NEXT: add 3, 3, 4
287 ; SLOW-NEXT: lis 4, 257
288 ; SLOW-NEXT: ori 5, 5, 3855
289 ; SLOW-NEXT: and 3, 3, 5
290 ; SLOW-NEXT: ori 4, 4, 257
291 ; SLOW-NEXT: mullw 3, 3, 4
292 ; SLOW-NEXT: rlwinm 3, 3, 8, 24, 31
294 %pop = tail call i32 @llvm.ctpop.i32(i32 %x)
295 %z = zext i32 %pop to i64
299 define i64 @popa_i16_i64(i16 %x) {
300 ; FAST-LABEL: popa_i16_i64:
302 ; FAST-NEXT: clrldi 3, 3, 48
303 ; FAST-NEXT: popcntd 3, 3
304 ; FAST-NEXT: rlwinm 3, 3, 0, 27, 27
307 ; SLOW-LABEL: popa_i16_i64:
309 ; SLOW-NEXT: clrlwi 5, 3, 16
310 ; SLOW-NEXT: rotlwi 3, 3, 31
311 ; SLOW-NEXT: andi. 3, 3, 21845
312 ; SLOW-NEXT: lis 4, 13107
313 ; SLOW-NEXT: sub 3, 5, 3
314 ; SLOW-NEXT: ori 4, 4, 13107
315 ; SLOW-NEXT: rotlwi 5, 3, 30
316 ; SLOW-NEXT: and 3, 3, 4
317 ; SLOW-NEXT: andis. 4, 5, 13107
318 ; SLOW-NEXT: andi. 5, 5, 13107
319 ; SLOW-NEXT: or 4, 5, 4
320 ; SLOW-NEXT: add 3, 3, 4
321 ; SLOW-NEXT: lis 5, 3855
322 ; SLOW-NEXT: srwi 4, 3, 4
323 ; SLOW-NEXT: add 3, 3, 4
324 ; SLOW-NEXT: lis 4, 257
325 ; SLOW-NEXT: ori 5, 5, 3855
326 ; SLOW-NEXT: and 3, 3, 5
327 ; SLOW-NEXT: ori 4, 4, 257
328 ; SLOW-NEXT: mullw 3, 3, 4
329 ; SLOW-NEXT: srwi 3, 3, 24
330 ; SLOW-NEXT: rlwinm 3, 3, 0, 27, 27
332 %pop = call i16 @llvm.ctpop.i16(i16 %x)
333 %z = zext i16 %pop to i64 ; SimplifyDemandedBits may turn zext (or sext) into aext
338 declare i8 @llvm.ctpop.i8(i8) nounwind readnone
339 declare i16 @llvm.ctpop.i16(i16) nounwind readnone
340 declare i32 @llvm.ctpop.i32(i32) nounwind readnone
341 declare i64 @llvm.ctpop.i64(i64) nounwind readnone