1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mattr=+popcntd < %s | FileCheck %s --check-prefix=FAST
3 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mattr=+slow-popcntd < %s | FileCheck %s --check-prefix=SLOW
4 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff -mattr=+popcntd < %s | FileCheck %s --check-prefix=FAST
5 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff -mattr=+slow-popcntd < %s | FileCheck %s --check-prefix=SLOW
7 define i16 @zpop_i8_i16(i8 %x) {
8 ; FAST-LABEL: zpop_i8_i16:
10 ; FAST-NEXT: clrldi 3, 3, 56
11 ; FAST-NEXT: popcntd 3, 3
14 ; SLOW-LABEL: zpop_i8_i16:
16 ; SLOW-NEXT: clrlwi 4, 3, 24
17 ; SLOW-NEXT: rotlwi 3, 3, 31
18 ; SLOW-NEXT: andi. 3, 3, 85
19 ; SLOW-NEXT: sub 3, 4, 3
20 ; SLOW-NEXT: andi. 4, 3, 13107
21 ; SLOW-NEXT: rotlwi 3, 3, 30
22 ; SLOW-NEXT: andi. 3, 3, 13107
23 ; SLOW-NEXT: add 3, 4, 3
24 ; SLOW-NEXT: srwi 4, 3, 4
25 ; SLOW-NEXT: add 3, 3, 4
26 ; SLOW-NEXT: rlwinm 4, 3, 24, 28, 31
27 ; SLOW-NEXT: clrlwi 3, 3, 28
28 ; SLOW-NEXT: add 3, 3, 4
30 %z = zext i8 %x to i16
31 %pop = tail call i16 @llvm.ctpop.i16(i16 %z)
35 define i16 @popz_i8_i16(i8 %x) {
36 ; FAST-LABEL: popz_i8_i16:
38 ; FAST-NEXT: clrldi 3, 3, 56
39 ; FAST-NEXT: popcntd 3, 3
42 ; SLOW-LABEL: popz_i8_i16:
44 ; SLOW-NEXT: rotlwi 4, 3, 31
45 ; SLOW-NEXT: andi. 4, 4, 85
46 ; SLOW-NEXT: sub 3, 3, 4
47 ; SLOW-NEXT: rlwinm 4, 3, 30, 30, 31
48 ; SLOW-NEXT: rlwimi 4, 3, 30, 26, 27
49 ; SLOW-NEXT: andi. 3, 3, 51
50 ; SLOW-NEXT: add 3, 3, 4
51 ; SLOW-NEXT: srwi 4, 3, 4
52 ; SLOW-NEXT: add 3, 3, 4
53 ; SLOW-NEXT: clrlwi 3, 3, 28
55 %pop = tail call i8 @llvm.ctpop.i8(i8 %x)
56 %z = zext i8 %pop to i16
60 define i32 @zpop_i8_i32(i8 %x) {
61 ; FAST-LABEL: zpop_i8_i32:
63 ; FAST-NEXT: clrlwi 3, 3, 24
64 ; FAST-NEXT: popcntw 3, 3
67 ; SLOW-LABEL: zpop_i8_i32:
69 ; SLOW-NEXT: clrlwi 5, 3, 24
70 ; SLOW-NEXT: rotlwi 3, 3, 31
71 ; SLOW-NEXT: andi. 3, 3, 85
72 ; SLOW-NEXT: lis 4, 13107
73 ; SLOW-NEXT: sub 3, 5, 3
74 ; SLOW-NEXT: ori 4, 4, 13107
75 ; SLOW-NEXT: rotlwi 5, 3, 30
76 ; SLOW-NEXT: and 3, 3, 4
77 ; SLOW-NEXT: andis. 4, 5, 13107
78 ; SLOW-NEXT: andi. 5, 5, 13107
79 ; SLOW-NEXT: or 4, 5, 4
80 ; SLOW-NEXT: add 3, 3, 4
81 ; SLOW-NEXT: lis 5, 3855
82 ; SLOW-NEXT: srwi 4, 3, 4
83 ; SLOW-NEXT: add 3, 3, 4
84 ; SLOW-NEXT: lis 4, 257
85 ; SLOW-NEXT: ori 5, 5, 3855
86 ; SLOW-NEXT: and 3, 3, 5
87 ; SLOW-NEXT: ori 4, 4, 257
88 ; SLOW-NEXT: mullw 3, 3, 4
89 ; SLOW-NEXT: srwi 3, 3, 24
91 %z = zext i8 %x to i32
92 %pop = tail call i32 @llvm.ctpop.i32(i32 %z)
96 define i32 @popz_i8_32(i8 %x) {
97 ; FAST-LABEL: popz_i8_32:
99 ; FAST-NEXT: clrldi 3, 3, 56
100 ; FAST-NEXT: popcntd 3, 3
103 ; SLOW-LABEL: popz_i8_32:
105 ; SLOW-NEXT: rotlwi 4, 3, 31
106 ; SLOW-NEXT: andi. 4, 4, 85
107 ; SLOW-NEXT: sub 3, 3, 4
108 ; SLOW-NEXT: rlwinm 4, 3, 30, 30, 31
109 ; SLOW-NEXT: rlwimi 4, 3, 30, 26, 27
110 ; SLOW-NEXT: andi. 3, 3, 51
111 ; SLOW-NEXT: add 3, 3, 4
112 ; SLOW-NEXT: srwi 4, 3, 4
113 ; SLOW-NEXT: add 3, 3, 4
114 ; SLOW-NEXT: clrlwi 3, 3, 28
116 %pop = tail call i8 @llvm.ctpop.i8(i8 %x)
117 %z = zext i8 %pop to i32
121 define i32 @zpop_i16_i32(i16 %x) {
122 ; FAST-LABEL: zpop_i16_i32:
124 ; FAST-NEXT: clrlwi 3, 3, 16
125 ; FAST-NEXT: popcntw 3, 3
128 ; SLOW-LABEL: zpop_i16_i32:
130 ; SLOW-NEXT: clrlwi 5, 3, 16
131 ; SLOW-NEXT: rotlwi 3, 3, 31
132 ; SLOW-NEXT: andi. 3, 3, 21845
133 ; SLOW-NEXT: lis 4, 13107
134 ; SLOW-NEXT: sub 3, 5, 3
135 ; SLOW-NEXT: ori 4, 4, 13107
136 ; SLOW-NEXT: rotlwi 5, 3, 30
137 ; SLOW-NEXT: and 3, 3, 4
138 ; SLOW-NEXT: andis. 4, 5, 13107
139 ; SLOW-NEXT: andi. 5, 5, 13107
140 ; SLOW-NEXT: or 4, 5, 4
141 ; SLOW-NEXT: add 3, 3, 4
142 ; SLOW-NEXT: lis 5, 3855
143 ; SLOW-NEXT: srwi 4, 3, 4
144 ; SLOW-NEXT: add 3, 3, 4
145 ; SLOW-NEXT: lis 4, 257
146 ; SLOW-NEXT: ori 5, 5, 3855
147 ; SLOW-NEXT: and 3, 3, 5
148 ; SLOW-NEXT: ori 4, 4, 257
149 ; SLOW-NEXT: mullw 3, 3, 4
150 ; SLOW-NEXT: srwi 3, 3, 24
152 %z = zext i16 %x to i32
153 %pop = tail call i32 @llvm.ctpop.i32(i32 %z)
157 define i32 @popz_i16_32(i16 %x) {
158 ; FAST-LABEL: popz_i16_32:
160 ; FAST-NEXT: clrldi 3, 3, 48
161 ; FAST-NEXT: popcntd 3, 3
164 ; SLOW-LABEL: popz_i16_32:
166 ; SLOW-NEXT: rotlwi 4, 3, 31
167 ; SLOW-NEXT: andi. 4, 4, 21845
168 ; SLOW-NEXT: sub 3, 3, 4
169 ; SLOW-NEXT: andi. 4, 3, 13107
170 ; SLOW-NEXT: rotlwi 3, 3, 30
171 ; SLOW-NEXT: andi. 3, 3, 13107
172 ; SLOW-NEXT: add 3, 4, 3
173 ; SLOW-NEXT: srwi 4, 3, 4
174 ; SLOW-NEXT: add 3, 3, 4
175 ; SLOW-NEXT: rlwinm 4, 3, 24, 28, 31
176 ; SLOW-NEXT: clrlwi 3, 3, 28
177 ; SLOW-NEXT: add 3, 3, 4
178 ; SLOW-NEXT: clrldi 3, 3, 32
180 %pop = tail call i16 @llvm.ctpop.i16(i16 %x)
181 %z = zext i16 %pop to i32
185 define i64 @zpop_i32_i64(i32 %x) {
186 ; FAST-LABEL: zpop_i32_i64:
188 ; FAST-NEXT: clrldi 3, 3, 32
189 ; FAST-NEXT: popcntd 3, 3
192 ; SLOW-LABEL: zpop_i32_i64:
194 ; SLOW-NEXT: rlwinm 5, 3, 31, 1, 0
195 ; SLOW-NEXT: lis 4, 13107
196 ; SLOW-NEXT: andis. 6, 5, 21845
197 ; SLOW-NEXT: andi. 5, 5, 21845
198 ; SLOW-NEXT: ori 4, 4, 13107
199 ; SLOW-NEXT: or 5, 5, 6
200 ; SLOW-NEXT: clrldi 3, 3, 32
201 ; SLOW-NEXT: rldimi 4, 4, 32, 0
202 ; SLOW-NEXT: sub 3, 3, 5
203 ; SLOW-NEXT: and 5, 3, 4
204 ; SLOW-NEXT: rotldi 3, 3, 62
205 ; SLOW-NEXT: and 3, 3, 4
206 ; SLOW-NEXT: add 3, 5, 3
207 ; SLOW-NEXT: lis 4, 3855
208 ; SLOW-NEXT: rldicl 5, 3, 60, 4
209 ; SLOW-NEXT: ori 4, 4, 3855
210 ; SLOW-NEXT: add 3, 3, 5
211 ; SLOW-NEXT: lis 5, 257
212 ; SLOW-NEXT: rldimi 4, 4, 32, 0
213 ; SLOW-NEXT: ori 5, 5, 257
214 ; SLOW-NEXT: and 3, 3, 4
215 ; SLOW-NEXT: rldimi 5, 5, 32, 0
216 ; SLOW-NEXT: mulld 3, 3, 5
217 ; SLOW-NEXT: rldicl 3, 3, 8, 56
219 %z = zext i32 %x to i64
220 %pop = tail call i64 @llvm.ctpop.i64(i64 %z)
224 define i64 @popz_i32_i64(i32 %x) {
225 ; FAST-LABEL: popz_i32_i64:
227 ; FAST-NEXT: popcntw 3, 3
228 ; FAST-NEXT: clrldi 3, 3, 32
231 ; SLOW-LABEL: popz_i32_i64:
233 ; SLOW-NEXT: rotlwi 5, 3, 31
234 ; SLOW-NEXT: andis. 6, 5, 21845
235 ; SLOW-NEXT: andi. 5, 5, 21845
236 ; SLOW-NEXT: or 5, 5, 6
237 ; SLOW-NEXT: lis 4, 13107
238 ; SLOW-NEXT: sub 3, 3, 5
239 ; SLOW-NEXT: ori 4, 4, 13107
240 ; SLOW-NEXT: rotlwi 5, 3, 30
241 ; SLOW-NEXT: and 3, 3, 4
242 ; SLOW-NEXT: andis. 4, 5, 13107
243 ; SLOW-NEXT: andi. 5, 5, 13107
244 ; SLOW-NEXT: or 4, 5, 4
245 ; SLOW-NEXT: add 3, 3, 4
246 ; SLOW-NEXT: lis 5, 3855
247 ; SLOW-NEXT: srwi 4, 3, 4
248 ; SLOW-NEXT: add 3, 3, 4
249 ; SLOW-NEXT: lis 4, 257
250 ; SLOW-NEXT: ori 5, 5, 3855
251 ; SLOW-NEXT: and 3, 3, 5
252 ; SLOW-NEXT: ori 4, 4, 257
253 ; SLOW-NEXT: mullw 3, 3, 4
254 ; SLOW-NEXT: rlwinm 3, 3, 8, 24, 31
256 %pop = tail call i32 @llvm.ctpop.i32(i32 %x)
257 %z = zext i32 %pop to i64
261 define i64 @popa_i16_i64(i16 %x) {
262 ; FAST-LABEL: popa_i16_i64:
264 ; FAST-NEXT: clrldi 3, 3, 48
265 ; FAST-NEXT: popcntd 3, 3
266 ; FAST-NEXT: rlwinm 3, 3, 0, 27, 27
269 ; SLOW-LABEL: popa_i16_i64:
271 ; SLOW-NEXT: rotlwi 4, 3, 31
272 ; SLOW-NEXT: andi. 4, 4, 21845
273 ; SLOW-NEXT: sub 3, 3, 4
274 ; SLOW-NEXT: andi. 4, 3, 13107
275 ; SLOW-NEXT: rotlwi 3, 3, 30
276 ; SLOW-NEXT: andi. 3, 3, 13107
277 ; SLOW-NEXT: add 3, 4, 3
278 ; SLOW-NEXT: srwi 4, 3, 4
279 ; SLOW-NEXT: add 3, 3, 4
280 ; SLOW-NEXT: rlwinm 4, 3, 24, 28, 31
281 ; SLOW-NEXT: clrlwi 3, 3, 28
282 ; SLOW-NEXT: add 3, 3, 4
283 ; SLOW-NEXT: rlwinm 3, 3, 0, 27, 27
285 %pop = call i16 @llvm.ctpop.i16(i16 %x)
286 %z = zext i16 %pop to i64 ; SimplifyDemandedBits may turn zext (or sext) into aext
291 declare i8 @llvm.ctpop.i8(i8) nounwind readnone
292 declare i16 @llvm.ctpop.i16(i16) nounwind readnone
293 declare i32 @llvm.ctpop.i32(i32) nounwind readnone
294 declare i64 @llvm.ctpop.i64(i64) nounwind readnone