1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mattr=+popcntd < %s | FileCheck %s --check-prefixes=ANY,FAST
3 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mattr=+slow-popcntd < %s | FileCheck %s --check-prefixes=ANY,SLOW
5 define i16 @zpop_i8_i16(i8 %x) {
6 ; FAST-LABEL: zpop_i8_i16:
8 ; FAST-NEXT: rlwinm 3, 3, 0, 24, 31
9 ; FAST-NEXT: popcntw 3, 3
12 ; SLOW-LABEL: zpop_i8_i16:
14 ; SLOW-NEXT: clrlwi 5, 3, 24
15 ; SLOW-NEXT: rlwinm 3, 3, 31, 0, 31
16 ; SLOW-NEXT: andi. 3, 3, 85
17 ; SLOW-NEXT: lis 4, 13107
18 ; SLOW-NEXT: subf 3, 3, 5
19 ; SLOW-NEXT: ori 4, 4, 13107
20 ; SLOW-NEXT: rotlwi 5, 3, 30
21 ; SLOW-NEXT: and 3, 3, 4
22 ; SLOW-NEXT: andis. 4, 5, 13107
23 ; SLOW-NEXT: andi. 5, 5, 13107
24 ; SLOW-NEXT: or 4, 5, 4
25 ; SLOW-NEXT: add 3, 3, 4
26 ; SLOW-NEXT: lis 5, 3855
27 ; SLOW-NEXT: srwi 4, 3, 4
28 ; SLOW-NEXT: add 3, 3, 4
29 ; SLOW-NEXT: lis 4, 257
30 ; SLOW-NEXT: ori 5, 5, 3855
31 ; SLOW-NEXT: and 3, 3, 5
32 ; SLOW-NEXT: ori 4, 4, 257
33 ; SLOW-NEXT: mullw 3, 3, 4
34 ; SLOW-NEXT: srwi 3, 3, 24
36 %z = zext i8 %x to i16
37 %pop = tail call i16 @llvm.ctpop.i16(i16 %z)
41 define i16 @popz_i8_i16(i8 %x) {
42 ; FAST-LABEL: popz_i8_i16:
44 ; FAST-NEXT: rlwinm 3, 3, 0, 24, 31
45 ; FAST-NEXT: popcntw 3, 3
46 ; FAST-NEXT: clrldi 3, 3, 32
49 ; SLOW-LABEL: popz_i8_i16:
51 ; SLOW-NEXT: clrlwi 5, 3, 24
52 ; SLOW-NEXT: rlwinm 3, 3, 31, 0, 31
53 ; SLOW-NEXT: andi. 3, 3, 85
54 ; SLOW-NEXT: lis 4, 13107
55 ; SLOW-NEXT: subf 3, 3, 5
56 ; SLOW-NEXT: ori 4, 4, 13107
57 ; SLOW-NEXT: rotlwi 5, 3, 30
58 ; SLOW-NEXT: and 3, 3, 4
59 ; SLOW-NEXT: andis. 4, 5, 13107
60 ; SLOW-NEXT: andi. 5, 5, 13107
61 ; SLOW-NEXT: or 4, 5, 4
62 ; SLOW-NEXT: add 3, 3, 4
63 ; SLOW-NEXT: lis 5, 3855
64 ; SLOW-NEXT: srwi 4, 3, 4
65 ; SLOW-NEXT: add 3, 3, 4
66 ; SLOW-NEXT: lis 4, 257
67 ; SLOW-NEXT: ori 5, 5, 3855
68 ; SLOW-NEXT: and 3, 3, 5
69 ; SLOW-NEXT: ori 4, 4, 257
70 ; SLOW-NEXT: mullw 3, 3, 4
71 ; SLOW-NEXT: rlwinm 3, 3, 8, 24, 31
73 %pop = tail call i8 @llvm.ctpop.i8(i8 %x)
74 %z = zext i8 %pop to i16
78 define i32 @zpop_i8_i32(i8 %x) {
79 ; FAST-LABEL: zpop_i8_i32:
81 ; FAST-NEXT: rlwinm 3, 3, 0, 24, 31
82 ; FAST-NEXT: popcntw 3, 3
85 ; SLOW-LABEL: zpop_i8_i32:
87 ; SLOW-NEXT: clrlwi 5, 3, 24
88 ; SLOW-NEXT: rlwinm 3, 3, 31, 0, 31
89 ; SLOW-NEXT: andi. 3, 3, 85
90 ; SLOW-NEXT: lis 4, 13107
91 ; SLOW-NEXT: subf 3, 3, 5
92 ; SLOW-NEXT: ori 4, 4, 13107
93 ; SLOW-NEXT: rotlwi 5, 3, 30
94 ; SLOW-NEXT: and 3, 3, 4
95 ; SLOW-NEXT: andis. 4, 5, 13107
96 ; SLOW-NEXT: andi. 5, 5, 13107
97 ; SLOW-NEXT: or 4, 5, 4
98 ; SLOW-NEXT: add 3, 3, 4
99 ; SLOW-NEXT: lis 5, 3855
100 ; SLOW-NEXT: srwi 4, 3, 4
101 ; SLOW-NEXT: add 3, 3, 4
102 ; SLOW-NEXT: lis 4, 257
103 ; SLOW-NEXT: ori 5, 5, 3855
104 ; SLOW-NEXT: and 3, 3, 5
105 ; SLOW-NEXT: ori 4, 4, 257
106 ; SLOW-NEXT: mullw 3, 3, 4
107 ; SLOW-NEXT: srwi 3, 3, 24
109 %z = zext i8 %x to i32
110 %pop = tail call i32 @llvm.ctpop.i32(i32 %z)
114 define i32 @popz_i8_32(i8 %x) {
115 ; FAST-LABEL: popz_i8_32:
117 ; FAST-NEXT: rlwinm 3, 3, 0, 24, 31
118 ; FAST-NEXT: popcntw 3, 3
119 ; FAST-NEXT: clrldi 3, 3, 32
122 ; SLOW-LABEL: popz_i8_32:
124 ; SLOW-NEXT: clrlwi 5, 3, 24
125 ; SLOW-NEXT: rlwinm 3, 3, 31, 0, 31
126 ; SLOW-NEXT: andi. 3, 3, 85
127 ; SLOW-NEXT: lis 4, 13107
128 ; SLOW-NEXT: subf 3, 3, 5
129 ; SLOW-NEXT: ori 4, 4, 13107
130 ; SLOW-NEXT: rotlwi 5, 3, 30
131 ; SLOW-NEXT: and 3, 3, 4
132 ; SLOW-NEXT: andis. 4, 5, 13107
133 ; SLOW-NEXT: andi. 5, 5, 13107
134 ; SLOW-NEXT: or 4, 5, 4
135 ; SLOW-NEXT: add 3, 3, 4
136 ; SLOW-NEXT: lis 5, 3855
137 ; SLOW-NEXT: srwi 4, 3, 4
138 ; SLOW-NEXT: add 3, 3, 4
139 ; SLOW-NEXT: lis 4, 257
140 ; SLOW-NEXT: ori 5, 5, 3855
141 ; SLOW-NEXT: and 3, 3, 5
142 ; SLOW-NEXT: ori 4, 4, 257
143 ; SLOW-NEXT: mullw 3, 3, 4
144 ; SLOW-NEXT: rlwinm 3, 3, 8, 24, 31
146 %pop = tail call i8 @llvm.ctpop.i8(i8 %x)
147 %z = zext i8 %pop to i32
151 define i32 @zpop_i16_i32(i16 %x) {
152 ; FAST-LABEL: zpop_i16_i32:
154 ; FAST-NEXT: rlwinm 3, 3, 0, 16, 31
155 ; FAST-NEXT: popcntw 3, 3
158 ; SLOW-LABEL: zpop_i16_i32:
160 ; SLOW-NEXT: clrlwi 5, 3, 16
161 ; SLOW-NEXT: rlwinm 3, 3, 31, 0, 31
162 ; SLOW-NEXT: andi. 3, 3, 21845
163 ; SLOW-NEXT: lis 4, 13107
164 ; SLOW-NEXT: subf 3, 3, 5
165 ; SLOW-NEXT: ori 4, 4, 13107
166 ; SLOW-NEXT: rotlwi 5, 3, 30
167 ; SLOW-NEXT: and 3, 3, 4
168 ; SLOW-NEXT: andis. 4, 5, 13107
169 ; SLOW-NEXT: andi. 5, 5, 13107
170 ; SLOW-NEXT: or 4, 5, 4
171 ; SLOW-NEXT: add 3, 3, 4
172 ; SLOW-NEXT: lis 5, 3855
173 ; SLOW-NEXT: srwi 4, 3, 4
174 ; SLOW-NEXT: add 3, 3, 4
175 ; SLOW-NEXT: lis 4, 257
176 ; SLOW-NEXT: ori 5, 5, 3855
177 ; SLOW-NEXT: and 3, 3, 5
178 ; SLOW-NEXT: ori 4, 4, 257
179 ; SLOW-NEXT: mullw 3, 3, 4
180 ; SLOW-NEXT: srwi 3, 3, 24
182 %z = zext i16 %x to i32
183 %pop = tail call i32 @llvm.ctpop.i32(i32 %z)
187 define i32 @popz_i16_32(i16 %x) {
188 ; FAST-LABEL: popz_i16_32:
190 ; FAST-NEXT: rlwinm 3, 3, 0, 16, 31
191 ; FAST-NEXT: popcntw 3, 3
192 ; FAST-NEXT: clrldi 3, 3, 32
195 ; SLOW-LABEL: popz_i16_32:
197 ; SLOW-NEXT: clrlwi 5, 3, 16
198 ; SLOW-NEXT: rlwinm 3, 3, 31, 0, 31
199 ; SLOW-NEXT: andi. 3, 3, 21845
200 ; SLOW-NEXT: lis 4, 13107
201 ; SLOW-NEXT: subf 3, 3, 5
202 ; SLOW-NEXT: ori 4, 4, 13107
203 ; SLOW-NEXT: rotlwi 5, 3, 30
204 ; SLOW-NEXT: and 3, 3, 4
205 ; SLOW-NEXT: andis. 4, 5, 13107
206 ; SLOW-NEXT: andi. 5, 5, 13107
207 ; SLOW-NEXT: or 4, 5, 4
208 ; SLOW-NEXT: add 3, 3, 4
209 ; SLOW-NEXT: lis 5, 3855
210 ; SLOW-NEXT: srwi 4, 3, 4
211 ; SLOW-NEXT: add 3, 3, 4
212 ; SLOW-NEXT: lis 4, 257
213 ; SLOW-NEXT: ori 5, 5, 3855
214 ; SLOW-NEXT: and 3, 3, 5
215 ; SLOW-NEXT: ori 4, 4, 257
216 ; SLOW-NEXT: mullw 3, 3, 4
217 ; SLOW-NEXT: rlwinm 3, 3, 8, 24, 31
219 %pop = tail call i16 @llvm.ctpop.i16(i16 %x)
220 %z = zext i16 %pop to i32
224 define i64 @zpop_i32_i64(i32 %x) {
225 ; FAST-LABEL: zpop_i32_i64:
227 ; FAST-NEXT: clrldi 3, 3, 32
228 ; FAST-NEXT: popcntd 3, 3
231 ; SLOW-LABEL: zpop_i32_i64:
233 ; SLOW-NEXT: rlwinm 5, 3, 31, 1, 0
234 ; SLOW-NEXT: lis 4, 13107
235 ; SLOW-NEXT: andis. 6, 5, 21845
236 ; SLOW-NEXT: andi. 5, 5, 21845
237 ; SLOW-NEXT: ori 4, 4, 13107
238 ; SLOW-NEXT: or 5, 5, 6
239 ; SLOW-NEXT: clrldi 3, 3, 32
240 ; SLOW-NEXT: rldimi 4, 4, 32, 0
241 ; SLOW-NEXT: sub 3, 3, 5
242 ; SLOW-NEXT: and 5, 3, 4
243 ; SLOW-NEXT: rotldi 3, 3, 62
244 ; SLOW-NEXT: and 3, 3, 4
245 ; SLOW-NEXT: add 3, 5, 3
246 ; SLOW-NEXT: lis 4, 3855
247 ; SLOW-NEXT: rldicl 5, 3, 60, 4
248 ; SLOW-NEXT: ori 4, 4, 3855
249 ; SLOW-NEXT: add 3, 3, 5
250 ; SLOW-NEXT: lis 5, 257
251 ; SLOW-NEXT: rldimi 4, 4, 32, 0
252 ; SLOW-NEXT: ori 5, 5, 257
253 ; SLOW-NEXT: and 3, 3, 4
254 ; SLOW-NEXT: rldimi 5, 5, 32, 0
255 ; SLOW-NEXT: mulld 3, 3, 5
256 ; SLOW-NEXT: rldicl 3, 3, 8, 56
258 %z = zext i32 %x to i64
259 %pop = tail call i64 @llvm.ctpop.i64(i64 %z)
263 define i64 @popz_i32_i64(i32 %x) {
264 ; FAST-LABEL: popz_i32_i64:
266 ; FAST-NEXT: popcntw 3, 3
267 ; FAST-NEXT: clrldi 3, 3, 32
270 ; SLOW-LABEL: popz_i32_i64:
272 ; SLOW-NEXT: rotlwi 5, 3, 31
273 ; SLOW-NEXT: andis. 6, 5, 21845
274 ; SLOW-NEXT: andi. 5, 5, 21845
275 ; SLOW-NEXT: or 5, 5, 6
276 ; SLOW-NEXT: lis 4, 13107
277 ; SLOW-NEXT: subf 3, 5, 3
278 ; SLOW-NEXT: ori 4, 4, 13107
279 ; SLOW-NEXT: rotlwi 5, 3, 30
280 ; SLOW-NEXT: and 3, 3, 4
281 ; SLOW-NEXT: andis. 4, 5, 13107
282 ; SLOW-NEXT: andi. 5, 5, 13107
283 ; SLOW-NEXT: or 4, 5, 4
284 ; SLOW-NEXT: add 3, 3, 4
285 ; SLOW-NEXT: lis 5, 3855
286 ; SLOW-NEXT: srwi 4, 3, 4
287 ; SLOW-NEXT: add 3, 3, 4
288 ; SLOW-NEXT: lis 4, 257
289 ; SLOW-NEXT: ori 5, 5, 3855
290 ; SLOW-NEXT: and 3, 3, 5
291 ; SLOW-NEXT: ori 4, 4, 257
292 ; SLOW-NEXT: mullw 3, 3, 4
293 ; SLOW-NEXT: rlwinm 3, 3, 8, 24, 31
295 %pop = tail call i32 @llvm.ctpop.i32(i32 %x)
296 %z = zext i32 %pop to i64
300 declare i8 @llvm.ctpop.i8(i8) nounwind readnone
301 declare i16 @llvm.ctpop.i16(i16) nounwind readnone
302 declare i32 @llvm.ctpop.i32(i32) nounwind readnone
303 declare i64 @llvm.ctpop.i64(i64) nounwind readnone