1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
3 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE
4 ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
5 ; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE
6 ; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
7 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE
8 ; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
9 ; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE
11 define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
12 ; P9LE-LABEL: fold_srem_vec_1:
15 ; P9LE-NEXT: lis r4, -21386
16 ; P9LE-NEXT: vextuhrx r3, r3, v2
17 ; P9LE-NEXT: ori r4, r4, 37253
18 ; P9LE-NEXT: extsh r3, r3
19 ; P9LE-NEXT: mulhw r4, r3, r4
20 ; P9LE-NEXT: add r4, r4, r3
21 ; P9LE-NEXT: srwi r5, r4, 31
22 ; P9LE-NEXT: srawi r4, r4, 6
23 ; P9LE-NEXT: add r4, r4, r5
24 ; P9LE-NEXT: mulli r4, r4, 95
25 ; P9LE-NEXT: sub r3, r3, r4
26 ; P9LE-NEXT: lis r4, 31710
27 ; P9LE-NEXT: mtvsrd v3, r3
29 ; P9LE-NEXT: ori r4, r4, 63421
30 ; P9LE-NEXT: vextuhrx r3, r3, v2
31 ; P9LE-NEXT: extsh r3, r3
32 ; P9LE-NEXT: mulhw r4, r3, r4
33 ; P9LE-NEXT: sub r4, r4, r3
34 ; P9LE-NEXT: srwi r5, r4, 31
35 ; P9LE-NEXT: srawi r4, r4, 6
36 ; P9LE-NEXT: add r4, r4, r5
37 ; P9LE-NEXT: mulli r4, r4, -124
38 ; P9LE-NEXT: sub r3, r3, r4
39 ; P9LE-NEXT: lis r4, 21399
40 ; P9LE-NEXT: mtvsrd v4, r3
42 ; P9LE-NEXT: ori r4, r4, 33437
43 ; P9LE-NEXT: vextuhrx r3, r3, v2
44 ; P9LE-NEXT: vmrghh v3, v4, v3
45 ; P9LE-NEXT: extsh r3, r3
46 ; P9LE-NEXT: mulhw r4, r3, r4
47 ; P9LE-NEXT: srwi r5, r4, 31
48 ; P9LE-NEXT: srawi r4, r4, 5
49 ; P9LE-NEXT: add r4, r4, r5
50 ; P9LE-NEXT: mulli r4, r4, 98
51 ; P9LE-NEXT: sub r3, r3, r4
52 ; P9LE-NEXT: lis r4, -16728
53 ; P9LE-NEXT: mtvsrd v4, r3
55 ; P9LE-NEXT: ori r4, r4, 63249
56 ; P9LE-NEXT: vextuhrx r3, r3, v2
57 ; P9LE-NEXT: extsh r3, r3
58 ; P9LE-NEXT: mulhw r4, r3, r4
59 ; P9LE-NEXT: srwi r5, r4, 31
60 ; P9LE-NEXT: srawi r4, r4, 8
61 ; P9LE-NEXT: add r4, r4, r5
62 ; P9LE-NEXT: mulli r4, r4, -1003
63 ; P9LE-NEXT: sub r3, r3, r4
64 ; P9LE-NEXT: mtvsrd v2, r3
65 ; P9LE-NEXT: vmrghh v2, v2, v4
66 ; P9LE-NEXT: vmrglw v2, v2, v3
69 ; P9BE-LABEL: fold_srem_vec_1:
72 ; P9BE-NEXT: lis r4, 31710
73 ; P9BE-NEXT: vextuhlx r3, r3, v2
74 ; P9BE-NEXT: ori r4, r4, 63421
75 ; P9BE-NEXT: extsh r3, r3
76 ; P9BE-NEXT: mulhw r4, r3, r4
77 ; P9BE-NEXT: sub r4, r4, r3
78 ; P9BE-NEXT: srwi r5, r4, 31
79 ; P9BE-NEXT: srawi r4, r4, 6
80 ; P9BE-NEXT: add r4, r4, r5
81 ; P9BE-NEXT: mulli r4, r4, -124
82 ; P9BE-NEXT: sub r3, r3, r4
83 ; P9BE-NEXT: lis r4, -21386
84 ; P9BE-NEXT: mtvsrwz v3, r3
86 ; P9BE-NEXT: ori r4, r4, 37253
87 ; P9BE-NEXT: vextuhlx r3, r3, v2
88 ; P9BE-NEXT: extsh r3, r3
89 ; P9BE-NEXT: mulhw r4, r3, r4
90 ; P9BE-NEXT: add r4, r4, r3
91 ; P9BE-NEXT: srwi r5, r4, 31
92 ; P9BE-NEXT: srawi r4, r4, 6
93 ; P9BE-NEXT: add r4, r4, r5
94 ; P9BE-NEXT: mulli r4, r4, 95
95 ; P9BE-NEXT: sub r3, r3, r4
96 ; P9BE-NEXT: lis r4, -16728
97 ; P9BE-NEXT: mtvsrwz v4, r3
98 ; P9BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha
99 ; P9BE-NEXT: ori r4, r4, 63249
100 ; P9BE-NEXT: addi r3, r3, .LCPI0_0@toc@l
101 ; P9BE-NEXT: lxv v5, 0(r3)
102 ; P9BE-NEXT: li r3, 6
103 ; P9BE-NEXT: vextuhlx r3, r3, v2
104 ; P9BE-NEXT: extsh r3, r3
105 ; P9BE-NEXT: vperm v3, v4, v3, v5
106 ; P9BE-NEXT: mulhw r4, r3, r4
107 ; P9BE-NEXT: srwi r5, r4, 31
108 ; P9BE-NEXT: srawi r4, r4, 8
109 ; P9BE-NEXT: add r4, r4, r5
110 ; P9BE-NEXT: mulli r4, r4, -1003
111 ; P9BE-NEXT: sub r3, r3, r4
112 ; P9BE-NEXT: lis r4, 21399
113 ; P9BE-NEXT: mtvsrwz v4, r3
114 ; P9BE-NEXT: li r3, 4
115 ; P9BE-NEXT: ori r4, r4, 33437
116 ; P9BE-NEXT: vextuhlx r3, r3, v2
117 ; P9BE-NEXT: extsh r3, r3
118 ; P9BE-NEXT: mulhw r4, r3, r4
119 ; P9BE-NEXT: srwi r5, r4, 31
120 ; P9BE-NEXT: srawi r4, r4, 5
121 ; P9BE-NEXT: add r4, r4, r5
122 ; P9BE-NEXT: mulli r4, r4, 98
123 ; P9BE-NEXT: sub r3, r3, r4
124 ; P9BE-NEXT: mtvsrwz v2, r3
125 ; P9BE-NEXT: vperm v2, v2, v4, v5
126 ; P9BE-NEXT: vmrghw v2, v3, v2
129 ; P8LE-LABEL: fold_srem_vec_1:
131 ; P8LE-NEXT: xxswapd vs0, v2
132 ; P8LE-NEXT: lis r3, 21399
133 ; P8LE-NEXT: lis r8, -16728
134 ; P8LE-NEXT: lis r9, -21386
135 ; P8LE-NEXT: lis r10, 31710
136 ; P8LE-NEXT: ori r3, r3, 33437
137 ; P8LE-NEXT: ori r8, r8, 63249
138 ; P8LE-NEXT: ori r9, r9, 37253
139 ; P8LE-NEXT: ori r10, r10, 63421
140 ; P8LE-NEXT: mffprd r4, f0
141 ; P8LE-NEXT: rldicl r5, r4, 32, 48
142 ; P8LE-NEXT: rldicl r6, r4, 16, 48
143 ; P8LE-NEXT: clrldi r7, r4, 48
144 ; P8LE-NEXT: extsh r5, r5
145 ; P8LE-NEXT: extsh r6, r6
146 ; P8LE-NEXT: rldicl r4, r4, 48, 48
147 ; P8LE-NEXT: extsh r7, r7
148 ; P8LE-NEXT: mulhw r3, r5, r3
149 ; P8LE-NEXT: extsh r4, r4
150 ; P8LE-NEXT: mulhw r8, r6, r8
151 ; P8LE-NEXT: mulhw r9, r7, r9
152 ; P8LE-NEXT: mulhw r10, r4, r10
153 ; P8LE-NEXT: srwi r11, r3, 31
154 ; P8LE-NEXT: srawi r3, r3, 5
155 ; P8LE-NEXT: add r3, r3, r11
156 ; P8LE-NEXT: srwi r11, r8, 31
157 ; P8LE-NEXT: add r9, r9, r7
158 ; P8LE-NEXT: srawi r8, r8, 8
159 ; P8LE-NEXT: sub r10, r10, r4
160 ; P8LE-NEXT: add r8, r8, r11
161 ; P8LE-NEXT: srwi r11, r9, 31
162 ; P8LE-NEXT: srawi r9, r9, 6
163 ; P8LE-NEXT: mulli r3, r3, 98
164 ; P8LE-NEXT: add r9, r9, r11
165 ; P8LE-NEXT: srwi r11, r10, 31
166 ; P8LE-NEXT: srawi r10, r10, 6
167 ; P8LE-NEXT: mulli r8, r8, -1003
168 ; P8LE-NEXT: add r10, r10, r11
169 ; P8LE-NEXT: mulli r9, r9, 95
170 ; P8LE-NEXT: mulli r10, r10, -124
171 ; P8LE-NEXT: sub r3, r5, r3
172 ; P8LE-NEXT: mtvsrd v2, r3
173 ; P8LE-NEXT: sub r5, r6, r8
174 ; P8LE-NEXT: sub r3, r7, r9
175 ; P8LE-NEXT: mtvsrd v3, r5
176 ; P8LE-NEXT: sub r4, r4, r10
177 ; P8LE-NEXT: mtvsrd v4, r3
178 ; P8LE-NEXT: mtvsrd v5, r4
179 ; P8LE-NEXT: vmrghh v2, v3, v2
180 ; P8LE-NEXT: vmrghh v3, v5, v4
181 ; P8LE-NEXT: vmrglw v2, v2, v3
184 ; P8BE-LABEL: fold_srem_vec_1:
186 ; P8BE-NEXT: mfvsrd r4, v2
187 ; P8BE-NEXT: lis r3, -16728
188 ; P8BE-NEXT: lis r8, 21399
189 ; P8BE-NEXT: lis r9, 31710
190 ; P8BE-NEXT: lis r10, -21386
191 ; P8BE-NEXT: ori r3, r3, 63249
192 ; P8BE-NEXT: ori r8, r8, 33437
193 ; P8BE-NEXT: ori r9, r9, 63421
194 ; P8BE-NEXT: ori r10, r10, 37253
195 ; P8BE-NEXT: clrldi r5, r4, 48
196 ; P8BE-NEXT: rldicl r6, r4, 48, 48
197 ; P8BE-NEXT: rldicl r7, r4, 32, 48
198 ; P8BE-NEXT: extsh r5, r5
199 ; P8BE-NEXT: extsh r6, r6
200 ; P8BE-NEXT: rldicl r4, r4, 16, 48
201 ; P8BE-NEXT: extsh r7, r7
202 ; P8BE-NEXT: mulhw r3, r5, r3
203 ; P8BE-NEXT: extsh r4, r4
204 ; P8BE-NEXT: mulhw r8, r6, r8
205 ; P8BE-NEXT: mulhw r9, r7, r9
206 ; P8BE-NEXT: mulhw r10, r4, r10
207 ; P8BE-NEXT: srwi r11, r3, 31
208 ; P8BE-NEXT: srawi r3, r3, 8
209 ; P8BE-NEXT: add r3, r3, r11
210 ; P8BE-NEXT: srwi r11, r8, 31
211 ; P8BE-NEXT: sub r9, r9, r7
212 ; P8BE-NEXT: srawi r8, r8, 5
213 ; P8BE-NEXT: add r10, r10, r4
214 ; P8BE-NEXT: add r8, r8, r11
215 ; P8BE-NEXT: srwi r11, r9, 31
216 ; P8BE-NEXT: srawi r9, r9, 6
217 ; P8BE-NEXT: mulli r3, r3, -1003
218 ; P8BE-NEXT: add r9, r9, r11
219 ; P8BE-NEXT: srwi r11, r10, 31
220 ; P8BE-NEXT: srawi r10, r10, 6
221 ; P8BE-NEXT: mulli r8, r8, 98
222 ; P8BE-NEXT: add r10, r10, r11
223 ; P8BE-NEXT: mulli r9, r9, -124
224 ; P8BE-NEXT: mulli r10, r10, 95
225 ; P8BE-NEXT: sub r3, r5, r3
226 ; P8BE-NEXT: addis r5, r2, .LCPI0_0@toc@ha
227 ; P8BE-NEXT: mtvsrwz v2, r3
228 ; P8BE-NEXT: addi r3, r5, .LCPI0_0@toc@l
229 ; P8BE-NEXT: sub r6, r6, r8
230 ; P8BE-NEXT: lxvw4x v3, 0, r3
231 ; P8BE-NEXT: sub r3, r7, r9
232 ; P8BE-NEXT: mtvsrwz v4, r6
233 ; P8BE-NEXT: sub r4, r4, r10
234 ; P8BE-NEXT: mtvsrwz v5, r3
235 ; P8BE-NEXT: mtvsrwz v0, r4
236 ; P8BE-NEXT: vperm v2, v4, v2, v3
237 ; P8BE-NEXT: vperm v3, v0, v5, v3
238 ; P8BE-NEXT: vmrghw v2, v3, v2
240 %1 = srem <4 x i16> %x, <i16 95, i16 -124, i16 98, i16 -1003>
244 define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
245 ; P9LE-LABEL: fold_srem_vec_2:
247 ; P9LE-NEXT: li r3, 0
248 ; P9LE-NEXT: lis r4, -21386
249 ; P9LE-NEXT: vextuhrx r3, r3, v2
250 ; P9LE-NEXT: ori r4, r4, 37253
251 ; P9LE-NEXT: extsh r3, r3
252 ; P9LE-NEXT: mulhw r5, r3, r4
253 ; P9LE-NEXT: add r5, r5, r3
254 ; P9LE-NEXT: srwi r6, r5, 31
255 ; P9LE-NEXT: srawi r5, r5, 6
256 ; P9LE-NEXT: add r5, r5, r6
257 ; P9LE-NEXT: mulli r5, r5, 95
258 ; P9LE-NEXT: sub r3, r3, r5
259 ; P9LE-NEXT: mtvsrd v3, r3
260 ; P9LE-NEXT: li r3, 2
261 ; P9LE-NEXT: vextuhrx r3, r3, v2
262 ; P9LE-NEXT: extsh r3, r3
263 ; P9LE-NEXT: mulhw r5, r3, r4
264 ; P9LE-NEXT: add r5, r5, r3
265 ; P9LE-NEXT: srwi r6, r5, 31
266 ; P9LE-NEXT: srawi r5, r5, 6
267 ; P9LE-NEXT: add r5, r5, r6
268 ; P9LE-NEXT: mulli r5, r5, 95
269 ; P9LE-NEXT: sub r3, r3, r5
270 ; P9LE-NEXT: mtvsrd v4, r3
271 ; P9LE-NEXT: li r3, 4
272 ; P9LE-NEXT: vextuhrx r3, r3, v2
273 ; P9LE-NEXT: vmrghh v3, v4, v3
274 ; P9LE-NEXT: extsh r3, r3
275 ; P9LE-NEXT: mulhw r5, r3, r4
276 ; P9LE-NEXT: add r5, r5, r3
277 ; P9LE-NEXT: srwi r6, r5, 31
278 ; P9LE-NEXT: srawi r5, r5, 6
279 ; P9LE-NEXT: add r5, r5, r6
280 ; P9LE-NEXT: mulli r5, r5, 95
281 ; P9LE-NEXT: sub r3, r3, r5
282 ; P9LE-NEXT: mtvsrd v4, r3
283 ; P9LE-NEXT: li r3, 6
284 ; P9LE-NEXT: vextuhrx r3, r3, v2
285 ; P9LE-NEXT: extsh r3, r3
286 ; P9LE-NEXT: mulhw r4, r3, r4
287 ; P9LE-NEXT: add r4, r4, r3
288 ; P9LE-NEXT: srwi r5, r4, 31
289 ; P9LE-NEXT: srawi r4, r4, 6
290 ; P9LE-NEXT: add r4, r4, r5
291 ; P9LE-NEXT: mulli r4, r4, 95
292 ; P9LE-NEXT: sub r3, r3, r4
293 ; P9LE-NEXT: mtvsrd v2, r3
294 ; P9LE-NEXT: vmrghh v2, v2, v4
295 ; P9LE-NEXT: vmrglw v2, v2, v3
298 ; P9BE-LABEL: fold_srem_vec_2:
300 ; P9BE-NEXT: li r3, 6
301 ; P9BE-NEXT: lis r4, -21386
302 ; P9BE-NEXT: vextuhlx r3, r3, v2
303 ; P9BE-NEXT: ori r4, r4, 37253
304 ; P9BE-NEXT: extsh r3, r3
305 ; P9BE-NEXT: mulhw r5, r3, r4
306 ; P9BE-NEXT: add r5, r5, r3
307 ; P9BE-NEXT: srwi r6, r5, 31
308 ; P9BE-NEXT: srawi r5, r5, 6
309 ; P9BE-NEXT: add r5, r5, r6
310 ; P9BE-NEXT: mulli r5, r5, 95
311 ; P9BE-NEXT: sub r3, r3, r5
312 ; P9BE-NEXT: mtvsrwz v3, r3
313 ; P9BE-NEXT: li r3, 4
314 ; P9BE-NEXT: vextuhlx r3, r3, v2
315 ; P9BE-NEXT: extsh r3, r3
316 ; P9BE-NEXT: mulhw r5, r3, r4
317 ; P9BE-NEXT: add r5, r5, r3
318 ; P9BE-NEXT: srwi r6, r5, 31
319 ; P9BE-NEXT: srawi r5, r5, 6
320 ; P9BE-NEXT: add r5, r5, r6
321 ; P9BE-NEXT: mulli r5, r5, 95
322 ; P9BE-NEXT: sub r3, r3, r5
323 ; P9BE-NEXT: mtvsrwz v4, r3
324 ; P9BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha
325 ; P9BE-NEXT: addi r3, r3, .LCPI1_0@toc@l
326 ; P9BE-NEXT: lxv v5, 0(r3)
327 ; P9BE-NEXT: li r3, 2
328 ; P9BE-NEXT: vextuhlx r3, r3, v2
329 ; P9BE-NEXT: extsh r3, r3
330 ; P9BE-NEXT: vperm v3, v4, v3, v5
331 ; P9BE-NEXT: mulhw r5, r3, r4
332 ; P9BE-NEXT: add r5, r5, r3
333 ; P9BE-NEXT: srwi r6, r5, 31
334 ; P9BE-NEXT: srawi r5, r5, 6
335 ; P9BE-NEXT: add r5, r5, r6
336 ; P9BE-NEXT: mulli r5, r5, 95
337 ; P9BE-NEXT: sub r3, r3, r5
338 ; P9BE-NEXT: mtvsrwz v4, r3
339 ; P9BE-NEXT: li r3, 0
340 ; P9BE-NEXT: vextuhlx r3, r3, v2
341 ; P9BE-NEXT: extsh r3, r3
342 ; P9BE-NEXT: mulhw r4, r3, r4
343 ; P9BE-NEXT: add r4, r4, r3
344 ; P9BE-NEXT: srwi r5, r4, 31
345 ; P9BE-NEXT: srawi r4, r4, 6
346 ; P9BE-NEXT: add r4, r4, r5
347 ; P9BE-NEXT: mulli r4, r4, 95
348 ; P9BE-NEXT: sub r3, r3, r4
349 ; P9BE-NEXT: mtvsrwz v2, r3
350 ; P9BE-NEXT: vperm v2, v2, v4, v5
351 ; P9BE-NEXT: vmrghw v2, v2, v3
354 ; P8LE-LABEL: fold_srem_vec_2:
356 ; P8LE-NEXT: xxswapd vs0, v2
357 ; P8LE-NEXT: lis r3, -21386
358 ; P8LE-NEXT: ori r3, r3, 37253
359 ; P8LE-NEXT: mffprd r4, f0
360 ; P8LE-NEXT: clrldi r5, r4, 48
361 ; P8LE-NEXT: rldicl r6, r4, 48, 48
362 ; P8LE-NEXT: extsh r5, r5
363 ; P8LE-NEXT: rldicl r7, r4, 32, 48
364 ; P8LE-NEXT: extsh r6, r6
365 ; P8LE-NEXT: mulhw r8, r5, r3
366 ; P8LE-NEXT: rldicl r4, r4, 16, 48
367 ; P8LE-NEXT: extsh r7, r7
368 ; P8LE-NEXT: mulhw r9, r6, r3
369 ; P8LE-NEXT: extsh r4, r4
370 ; P8LE-NEXT: mulhw r10, r7, r3
371 ; P8LE-NEXT: mulhw r3, r4, r3
372 ; P8LE-NEXT: add r8, r8, r5
373 ; P8LE-NEXT: add r9, r9, r6
374 ; P8LE-NEXT: srwi r11, r8, 31
375 ; P8LE-NEXT: srawi r8, r8, 6
376 ; P8LE-NEXT: add r10, r10, r7
377 ; P8LE-NEXT: add r3, r3, r4
378 ; P8LE-NEXT: add r8, r8, r11
379 ; P8LE-NEXT: srwi r11, r9, 31
380 ; P8LE-NEXT: srawi r9, r9, 6
381 ; P8LE-NEXT: mulli r8, r8, 95
382 ; P8LE-NEXT: add r9, r9, r11
383 ; P8LE-NEXT: srwi r11, r10, 31
384 ; P8LE-NEXT: srawi r10, r10, 6
385 ; P8LE-NEXT: mulli r9, r9, 95
386 ; P8LE-NEXT: add r10, r10, r11
387 ; P8LE-NEXT: srwi r11, r3, 31
388 ; P8LE-NEXT: srawi r3, r3, 6
389 ; P8LE-NEXT: mulli r10, r10, 95
390 ; P8LE-NEXT: sub r5, r5, r8
391 ; P8LE-NEXT: add r3, r3, r11
392 ; P8LE-NEXT: mtvsrd v2, r5
393 ; P8LE-NEXT: mulli r3, r3, 95
394 ; P8LE-NEXT: sub r6, r6, r9
395 ; P8LE-NEXT: mtvsrd v3, r6
396 ; P8LE-NEXT: sub r5, r7, r10
397 ; P8LE-NEXT: mtvsrd v4, r5
398 ; P8LE-NEXT: sub r3, r4, r3
399 ; P8LE-NEXT: vmrghh v2, v3, v2
400 ; P8LE-NEXT: mtvsrd v5, r3
401 ; P8LE-NEXT: vmrghh v3, v5, v4
402 ; P8LE-NEXT: vmrglw v2, v3, v2
405 ; P8BE-LABEL: fold_srem_vec_2:
407 ; P8BE-NEXT: mfvsrd r4, v2
408 ; P8BE-NEXT: lis r3, -21386
409 ; P8BE-NEXT: ori r3, r3, 37253
410 ; P8BE-NEXT: clrldi r5, r4, 48
411 ; P8BE-NEXT: rldicl r6, r4, 48, 48
412 ; P8BE-NEXT: extsh r5, r5
413 ; P8BE-NEXT: rldicl r7, r4, 32, 48
414 ; P8BE-NEXT: extsh r6, r6
415 ; P8BE-NEXT: mulhw r8, r5, r3
416 ; P8BE-NEXT: rldicl r4, r4, 16, 48
417 ; P8BE-NEXT: extsh r7, r7
418 ; P8BE-NEXT: mulhw r9, r6, r3
419 ; P8BE-NEXT: extsh r4, r4
420 ; P8BE-NEXT: mulhw r10, r7, r3
421 ; P8BE-NEXT: mulhw r3, r4, r3
422 ; P8BE-NEXT: add r8, r8, r5
423 ; P8BE-NEXT: add r9, r9, r6
424 ; P8BE-NEXT: srwi r11, r8, 31
425 ; P8BE-NEXT: srawi r8, r8, 6
426 ; P8BE-NEXT: add r10, r10, r7
427 ; P8BE-NEXT: add r3, r3, r4
428 ; P8BE-NEXT: add r8, r8, r11
429 ; P8BE-NEXT: srwi r11, r9, 31
430 ; P8BE-NEXT: srawi r9, r9, 6
431 ; P8BE-NEXT: mulli r8, r8, 95
432 ; P8BE-NEXT: add r9, r9, r11
433 ; P8BE-NEXT: srwi r11, r10, 31
434 ; P8BE-NEXT: srawi r10, r10, 6
435 ; P8BE-NEXT: mulli r9, r9, 95
436 ; P8BE-NEXT: add r10, r10, r11
437 ; P8BE-NEXT: srwi r11, r3, 31
438 ; P8BE-NEXT: srawi r3, r3, 6
439 ; P8BE-NEXT: mulli r10, r10, 95
440 ; P8BE-NEXT: sub r5, r5, r8
441 ; P8BE-NEXT: addis r8, r2, .LCPI1_0@toc@ha
442 ; P8BE-NEXT: add r3, r3, r11
443 ; P8BE-NEXT: mtvsrwz v2, r5
444 ; P8BE-NEXT: addi r5, r8, .LCPI1_0@toc@l
445 ; P8BE-NEXT: mulli r3, r3, 95
446 ; P8BE-NEXT: sub r6, r6, r9
447 ; P8BE-NEXT: lxvw4x v3, 0, r5
448 ; P8BE-NEXT: mtvsrwz v4, r6
449 ; P8BE-NEXT: sub r5, r7, r10
450 ; P8BE-NEXT: mtvsrwz v5, r5
451 ; P8BE-NEXT: sub r3, r4, r3
452 ; P8BE-NEXT: vperm v2, v4, v2, v3
453 ; P8BE-NEXT: mtvsrwz v0, r3
454 ; P8BE-NEXT: vperm v3, v0, v5, v3
455 ; P8BE-NEXT: vmrghw v2, v3, v2
457 %1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
462 ; Don't fold if we can combine srem with sdiv.
463 define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
464 ; P9LE-LABEL: combine_srem_sdiv:
466 ; P9LE-NEXT: li r3, 0
467 ; P9LE-NEXT: lis r4, -21386
468 ; P9LE-NEXT: vextuhrx r3, r3, v2
469 ; P9LE-NEXT: ori r4, r4, 37253
470 ; P9LE-NEXT: extsh r3, r3
471 ; P9LE-NEXT: mulhw r5, r3, r4
472 ; P9LE-NEXT: add r5, r5, r3
473 ; P9LE-NEXT: srwi r6, r5, 31
474 ; P9LE-NEXT: srawi r5, r5, 6
475 ; P9LE-NEXT: add r5, r5, r6
476 ; P9LE-NEXT: mulli r6, r5, 95
477 ; P9LE-NEXT: sub r3, r3, r6
478 ; P9LE-NEXT: mtvsrd v3, r3
479 ; P9LE-NEXT: li r3, 2
480 ; P9LE-NEXT: vextuhrx r3, r3, v2
481 ; P9LE-NEXT: extsh r6, r3
482 ; P9LE-NEXT: mulhw r7, r6, r4
483 ; P9LE-NEXT: add r6, r7, r6
484 ; P9LE-NEXT: srwi r7, r6, 31
485 ; P9LE-NEXT: srawi r6, r6, 6
486 ; P9LE-NEXT: add r6, r6, r7
487 ; P9LE-NEXT: mulli r7, r6, 95
488 ; P9LE-NEXT: sub r3, r3, r7
489 ; P9LE-NEXT: mtvsrd v4, r3
490 ; P9LE-NEXT: li r3, 4
491 ; P9LE-NEXT: vextuhrx r3, r3, v2
492 ; P9LE-NEXT: vmrghh v3, v4, v3
493 ; P9LE-NEXT: extsh r7, r3
494 ; P9LE-NEXT: mulhw r8, r7, r4
495 ; P9LE-NEXT: add r7, r8, r7
496 ; P9LE-NEXT: srwi r8, r7, 31
497 ; P9LE-NEXT: srawi r7, r7, 6
498 ; P9LE-NEXT: add r7, r7, r8
499 ; P9LE-NEXT: mulli r8, r7, 95
500 ; P9LE-NEXT: sub r3, r3, r8
501 ; P9LE-NEXT: mtvsrd v4, r3
502 ; P9LE-NEXT: li r3, 6
503 ; P9LE-NEXT: vextuhrx r3, r3, v2
504 ; P9LE-NEXT: extsh r8, r3
505 ; P9LE-NEXT: mulhw r4, r8, r4
506 ; P9LE-NEXT: add r4, r4, r8
507 ; P9LE-NEXT: srwi r8, r4, 31
508 ; P9LE-NEXT: srawi r4, r4, 6
509 ; P9LE-NEXT: add r4, r4, r8
510 ; P9LE-NEXT: mulli r8, r4, 95
511 ; P9LE-NEXT: mtvsrd v5, r4
512 ; P9LE-NEXT: sub r3, r3, r8
513 ; P9LE-NEXT: mtvsrd v2, r3
514 ; P9LE-NEXT: vmrghh v2, v2, v4
515 ; P9LE-NEXT: mtvsrd v4, r6
516 ; P9LE-NEXT: vmrglw v2, v2, v3
517 ; P9LE-NEXT: mtvsrd v3, r5
518 ; P9LE-NEXT: vmrghh v3, v4, v3
519 ; P9LE-NEXT: mtvsrd v4, r7
520 ; P9LE-NEXT: vmrghh v4, v5, v4
521 ; P9LE-NEXT: vmrglw v3, v4, v3
522 ; P9LE-NEXT: vadduhm v2, v2, v3
525 ; P9BE-LABEL: combine_srem_sdiv:
527 ; P9BE-NEXT: li r3, 6
528 ; P9BE-NEXT: lis r5, -21386
529 ; P9BE-NEXT: vextuhlx r3, r3, v2
530 ; P9BE-NEXT: ori r5, r5, 37253
531 ; P9BE-NEXT: extsh r4, r3
532 ; P9BE-NEXT: mulhw r6, r4, r5
533 ; P9BE-NEXT: add r4, r6, r4
534 ; P9BE-NEXT: srwi r6, r4, 31
535 ; P9BE-NEXT: srawi r4, r4, 6
536 ; P9BE-NEXT: add r4, r4, r6
537 ; P9BE-NEXT: mulli r6, r4, 95
538 ; P9BE-NEXT: sub r3, r3, r6
539 ; P9BE-NEXT: mtvsrwz v3, r3
540 ; P9BE-NEXT: li r3, 4
541 ; P9BE-NEXT: vextuhlx r3, r3, v2
542 ; P9BE-NEXT: extsh r6, r3
543 ; P9BE-NEXT: mulhw r7, r6, r5
544 ; P9BE-NEXT: add r6, r7, r6
545 ; P9BE-NEXT: srwi r7, r6, 31
546 ; P9BE-NEXT: srawi r6, r6, 6
547 ; P9BE-NEXT: add r6, r6, r7
548 ; P9BE-NEXT: mulli r7, r6, 95
549 ; P9BE-NEXT: sub r3, r3, r7
550 ; P9BE-NEXT: mtvsrwz v4, r3
551 ; P9BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha
552 ; P9BE-NEXT: addi r3, r3, .LCPI2_0@toc@l
553 ; P9BE-NEXT: lxv v5, 0(r3)
554 ; P9BE-NEXT: li r3, 2
555 ; P9BE-NEXT: vextuhlx r3, r3, v2
556 ; P9BE-NEXT: extsh r7, r3
557 ; P9BE-NEXT: vperm v3, v4, v3, v5
558 ; P9BE-NEXT: mulhw r8, r7, r5
559 ; P9BE-NEXT: add r7, r8, r7
560 ; P9BE-NEXT: srwi r8, r7, 31
561 ; P9BE-NEXT: srawi r7, r7, 6
562 ; P9BE-NEXT: add r7, r7, r8
563 ; P9BE-NEXT: mulli r8, r7, 95
564 ; P9BE-NEXT: sub r3, r3, r8
565 ; P9BE-NEXT: mtvsrwz v4, r3
566 ; P9BE-NEXT: li r3, 0
567 ; P9BE-NEXT: vextuhlx r3, r3, v2
568 ; P9BE-NEXT: extsh r3, r3
569 ; P9BE-NEXT: mulhw r5, r3, r5
570 ; P9BE-NEXT: add r5, r5, r3
571 ; P9BE-NEXT: srwi r8, r5, 31
572 ; P9BE-NEXT: srawi r5, r5, 6
573 ; P9BE-NEXT: add r5, r5, r8
574 ; P9BE-NEXT: mulli r8, r5, 95
575 ; P9BE-NEXT: mtvsrwz v0, r5
576 ; P9BE-NEXT: sub r3, r3, r8
577 ; P9BE-NEXT: mtvsrwz v2, r3
578 ; P9BE-NEXT: vperm v2, v2, v4, v5
579 ; P9BE-NEXT: mtvsrwz v4, r6
580 ; P9BE-NEXT: vmrghw v2, v2, v3
581 ; P9BE-NEXT: mtvsrwz v3, r4
582 ; P9BE-NEXT: vperm v3, v4, v3, v5
583 ; P9BE-NEXT: mtvsrwz v4, r7
584 ; P9BE-NEXT: vperm v4, v0, v4, v5
585 ; P9BE-NEXT: vmrghw v3, v4, v3
586 ; P9BE-NEXT: vadduhm v2, v2, v3
589 ; P8LE-LABEL: combine_srem_sdiv:
591 ; P8LE-NEXT: xxswapd vs0, v2
592 ; P8LE-NEXT: lis r3, -21386
593 ; P8LE-NEXT: ori r3, r3, 37253
594 ; P8LE-NEXT: mffprd r4, f0
595 ; P8LE-NEXT: clrldi r5, r4, 48
596 ; P8LE-NEXT: rldicl r6, r4, 48, 48
597 ; P8LE-NEXT: rldicl r7, r4, 32, 48
598 ; P8LE-NEXT: extsh r5, r5
599 ; P8LE-NEXT: extsh r8, r6
600 ; P8LE-NEXT: extsh r9, r7
601 ; P8LE-NEXT: mulhw r10, r5, r3
602 ; P8LE-NEXT: mulhw r11, r8, r3
603 ; P8LE-NEXT: rldicl r4, r4, 16, 48
604 ; P8LE-NEXT: mulhw r12, r9, r3
605 ; P8LE-NEXT: extsh r0, r4
606 ; P8LE-NEXT: mulhw r3, r0, r3
607 ; P8LE-NEXT: add r10, r10, r5
608 ; P8LE-NEXT: add r8, r11, r8
609 ; P8LE-NEXT: srwi r11, r10, 31
610 ; P8LE-NEXT: add r9, r12, r9
611 ; P8LE-NEXT: srawi r10, r10, 6
612 ; P8LE-NEXT: srawi r12, r8, 6
613 ; P8LE-NEXT: srwi r8, r8, 31
614 ; P8LE-NEXT: add r10, r10, r11
615 ; P8LE-NEXT: add r3, r3, r0
616 ; P8LE-NEXT: srawi r11, r9, 6
617 ; P8LE-NEXT: srwi r9, r9, 31
618 ; P8LE-NEXT: add r8, r12, r8
619 ; P8LE-NEXT: mtvsrd v2, r10
620 ; P8LE-NEXT: mulli r12, r10, 95
621 ; P8LE-NEXT: add r9, r11, r9
622 ; P8LE-NEXT: srwi r11, r3, 31
623 ; P8LE-NEXT: mtvsrd v3, r8
624 ; P8LE-NEXT: srawi r3, r3, 6
625 ; P8LE-NEXT: mulli r10, r8, 95
626 ; P8LE-NEXT: mtvsrd v4, r9
627 ; P8LE-NEXT: add r3, r3, r11
628 ; P8LE-NEXT: mulli r8, r9, 95
629 ; P8LE-NEXT: vmrghh v2, v3, v2
630 ; P8LE-NEXT: mulli r9, r3, 95
631 ; P8LE-NEXT: sub r5, r5, r12
632 ; P8LE-NEXT: sub r6, r6, r10
633 ; P8LE-NEXT: mtvsrd v3, r5
634 ; P8LE-NEXT: mtvsrd v5, r6
635 ; P8LE-NEXT: sub r5, r7, r8
636 ; P8LE-NEXT: sub r4, r4, r9
637 ; P8LE-NEXT: mtvsrd v0, r5
638 ; P8LE-NEXT: mtvsrd v1, r4
639 ; P8LE-NEXT: vmrghh v3, v5, v3
640 ; P8LE-NEXT: mtvsrd v5, r3
641 ; P8LE-NEXT: vmrghh v0, v1, v0
642 ; P8LE-NEXT: vmrghh v4, v5, v4
643 ; P8LE-NEXT: vmrglw v3, v0, v3
644 ; P8LE-NEXT: vmrglw v2, v4, v2
645 ; P8LE-NEXT: vadduhm v2, v3, v2
648 ; P8BE-LABEL: combine_srem_sdiv:
650 ; P8BE-NEXT: mfvsrd r4, v2
651 ; P8BE-NEXT: lis r3, -21386
652 ; P8BE-NEXT: std r30, -16(r1) # 8-byte Folded Spill
653 ; P8BE-NEXT: addis r30, r2, .LCPI2_0@toc@ha
654 ; P8BE-NEXT: ori r3, r3, 37253
655 ; P8BE-NEXT: clrldi r5, r4, 48
656 ; P8BE-NEXT: rldicl r6, r4, 48, 48
657 ; P8BE-NEXT: rldicl r7, r4, 32, 48
658 ; P8BE-NEXT: extsh r8, r5
659 ; P8BE-NEXT: extsh r9, r6
660 ; P8BE-NEXT: extsh r10, r7
661 ; P8BE-NEXT: mulhw r11, r8, r3
662 ; P8BE-NEXT: mulhw r12, r9, r3
663 ; P8BE-NEXT: rldicl r4, r4, 16, 48
664 ; P8BE-NEXT: mulhw r0, r10, r3
665 ; P8BE-NEXT: extsh r4, r4
666 ; P8BE-NEXT: mulhw r3, r4, r3
667 ; P8BE-NEXT: add r8, r11, r8
668 ; P8BE-NEXT: add r9, r12, r9
669 ; P8BE-NEXT: srwi r11, r8, 31
670 ; P8BE-NEXT: add r10, r0, r10
671 ; P8BE-NEXT: srawi r8, r8, 6
672 ; P8BE-NEXT: addi r0, r30, .LCPI2_0@toc@l
673 ; P8BE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
674 ; P8BE-NEXT: srawi r12, r9, 6
675 ; P8BE-NEXT: srwi r9, r9, 31
676 ; P8BE-NEXT: add r8, r8, r11
677 ; P8BE-NEXT: add r3, r3, r4
678 ; P8BE-NEXT: lxvw4x v2, 0, r0
679 ; P8BE-NEXT: srawi r11, r10, 6
680 ; P8BE-NEXT: srwi r10, r10, 31
681 ; P8BE-NEXT: add r9, r12, r9
682 ; P8BE-NEXT: mtvsrwz v3, r8
683 ; P8BE-NEXT: mulli r12, r8, 95
684 ; P8BE-NEXT: add r10, r11, r10
685 ; P8BE-NEXT: srwi r11, r3, 31
686 ; P8BE-NEXT: mtvsrwz v4, r9
687 ; P8BE-NEXT: srawi r3, r3, 6
688 ; P8BE-NEXT: mulli r8, r9, 95
689 ; P8BE-NEXT: mtvsrwz v5, r10
690 ; P8BE-NEXT: add r3, r3, r11
691 ; P8BE-NEXT: mulli r9, r10, 95
692 ; P8BE-NEXT: vperm v3, v4, v3, v2
693 ; P8BE-NEXT: mulli r10, r3, 95
694 ; P8BE-NEXT: sub r5, r5, r12
695 ; P8BE-NEXT: sub r6, r6, r8
696 ; P8BE-NEXT: mtvsrwz v4, r5
697 ; P8BE-NEXT: mtvsrwz v0, r6
698 ; P8BE-NEXT: sub r5, r7, r9
699 ; P8BE-NEXT: sub r4, r4, r10
700 ; P8BE-NEXT: mtvsrwz v1, r5
701 ; P8BE-NEXT: mtvsrwz v6, r4
702 ; P8BE-NEXT: vperm v4, v0, v4, v2
703 ; P8BE-NEXT: mtvsrwz v0, r3
704 ; P8BE-NEXT: vperm v1, v6, v1, v2
705 ; P8BE-NEXT: vperm v2, v0, v5, v2
706 ; P8BE-NEXT: vmrghw v4, v1, v4
707 ; P8BE-NEXT: vmrghw v2, v2, v3
708 ; P8BE-NEXT: vadduhm v2, v4, v2
710 %1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
711 %2 = sdiv <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
712 %3 = add <4 x i16> %1, %2
716 ; Don't fold for divisors that are a power of two.
717 define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
718 ; P9LE-LABEL: dont_fold_srem_power_of_two:
720 ; P9LE-NEXT: li r3, 0
721 ; P9LE-NEXT: vextuhrx r3, r3, v2
722 ; P9LE-NEXT: extsh r3, r3
723 ; P9LE-NEXT: srawi r4, r3, 6
724 ; P9LE-NEXT: addze r4, r4
725 ; P9LE-NEXT: slwi r4, r4, 6
726 ; P9LE-NEXT: sub r3, r3, r4
727 ; P9LE-NEXT: mtvsrd v3, r3
728 ; P9LE-NEXT: li r3, 2
729 ; P9LE-NEXT: vextuhrx r3, r3, v2
730 ; P9LE-NEXT: extsh r3, r3
731 ; P9LE-NEXT: srawi r4, r3, 5
732 ; P9LE-NEXT: addze r4, r4
733 ; P9LE-NEXT: slwi r4, r4, 5
734 ; P9LE-NEXT: sub r3, r3, r4
735 ; P9LE-NEXT: lis r4, -21386
736 ; P9LE-NEXT: mtvsrd v4, r3
737 ; P9LE-NEXT: li r3, 6
738 ; P9LE-NEXT: ori r4, r4, 37253
739 ; P9LE-NEXT: vextuhrx r3, r3, v2
740 ; P9LE-NEXT: vmrghh v3, v4, v3
741 ; P9LE-NEXT: extsh r3, r3
742 ; P9LE-NEXT: mulhw r4, r3, r4
743 ; P9LE-NEXT: add r4, r4, r3
744 ; P9LE-NEXT: srwi r5, r4, 31
745 ; P9LE-NEXT: srawi r4, r4, 6
746 ; P9LE-NEXT: add r4, r4, r5
747 ; P9LE-NEXT: mulli r4, r4, 95
748 ; P9LE-NEXT: sub r3, r3, r4
749 ; P9LE-NEXT: mtvsrd v4, r3
750 ; P9LE-NEXT: li r3, 4
751 ; P9LE-NEXT: vextuhrx r3, r3, v2
752 ; P9LE-NEXT: extsh r3, r3
753 ; P9LE-NEXT: srawi r4, r3, 3
754 ; P9LE-NEXT: addze r4, r4
755 ; P9LE-NEXT: slwi r4, r4, 3
756 ; P9LE-NEXT: sub r3, r3, r4
757 ; P9LE-NEXT: mtvsrd v2, r3
758 ; P9LE-NEXT: vmrghh v2, v4, v2
759 ; P9LE-NEXT: vmrglw v2, v2, v3
762 ; P9BE-LABEL: dont_fold_srem_power_of_two:
764 ; P9BE-NEXT: li r3, 2
765 ; P9BE-NEXT: vextuhlx r3, r3, v2
766 ; P9BE-NEXT: extsh r3, r3
767 ; P9BE-NEXT: srawi r4, r3, 5
768 ; P9BE-NEXT: addze r4, r4
769 ; P9BE-NEXT: slwi r4, r4, 5
770 ; P9BE-NEXT: sub r3, r3, r4
771 ; P9BE-NEXT: mtvsrwz v3, r3
772 ; P9BE-NEXT: li r3, 0
773 ; P9BE-NEXT: vextuhlx r3, r3, v2
774 ; P9BE-NEXT: extsh r3, r3
775 ; P9BE-NEXT: srawi r4, r3, 6
776 ; P9BE-NEXT: addze r4, r4
777 ; P9BE-NEXT: slwi r4, r4, 6
778 ; P9BE-NEXT: sub r3, r3, r4
779 ; P9BE-NEXT: lis r4, -21386
780 ; P9BE-NEXT: mtvsrwz v4, r3
781 ; P9BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha
782 ; P9BE-NEXT: ori r4, r4, 37253
783 ; P9BE-NEXT: addi r3, r3, .LCPI3_0@toc@l
784 ; P9BE-NEXT: lxv v5, 0(r3)
785 ; P9BE-NEXT: li r3, 6
786 ; P9BE-NEXT: vextuhlx r3, r3, v2
787 ; P9BE-NEXT: extsh r3, r3
788 ; P9BE-NEXT: vperm v3, v4, v3, v5
789 ; P9BE-NEXT: mulhw r4, r3, r4
790 ; P9BE-NEXT: add r4, r4, r3
791 ; P9BE-NEXT: srwi r5, r4, 31
792 ; P9BE-NEXT: srawi r4, r4, 6
793 ; P9BE-NEXT: add r4, r4, r5
794 ; P9BE-NEXT: mulli r4, r4, 95
795 ; P9BE-NEXT: sub r3, r3, r4
796 ; P9BE-NEXT: mtvsrwz v4, r3
797 ; P9BE-NEXT: li r3, 4
798 ; P9BE-NEXT: vextuhlx r3, r3, v2
799 ; P9BE-NEXT: extsh r3, r3
800 ; P9BE-NEXT: srawi r4, r3, 3
801 ; P9BE-NEXT: addze r4, r4
802 ; P9BE-NEXT: slwi r4, r4, 3
803 ; P9BE-NEXT: sub r3, r3, r4
804 ; P9BE-NEXT: mtvsrwz v2, r3
805 ; P9BE-NEXT: vperm v2, v2, v4, v5
806 ; P9BE-NEXT: vmrghw v2, v3, v2
809 ; P8LE-LABEL: dont_fold_srem_power_of_two:
811 ; P8LE-NEXT: xxswapd vs0, v2
812 ; P8LE-NEXT: lis r3, -21386
813 ; P8LE-NEXT: ori r3, r3, 37253
814 ; P8LE-NEXT: mffprd r4, f0
815 ; P8LE-NEXT: rldicl r5, r4, 16, 48
816 ; P8LE-NEXT: clrldi r6, r4, 48
817 ; P8LE-NEXT: extsh r5, r5
818 ; P8LE-NEXT: extsh r6, r6
819 ; P8LE-NEXT: mulhw r3, r5, r3
820 ; P8LE-NEXT: rldicl r7, r4, 48, 48
821 ; P8LE-NEXT: srawi r8, r6, 6
822 ; P8LE-NEXT: extsh r7, r7
823 ; P8LE-NEXT: addze r8, r8
824 ; P8LE-NEXT: rldicl r4, r4, 32, 48
825 ; P8LE-NEXT: srawi r9, r7, 5
826 ; P8LE-NEXT: extsh r4, r4
827 ; P8LE-NEXT: slwi r8, r8, 6
828 ; P8LE-NEXT: add r3, r3, r5
829 ; P8LE-NEXT: addze r9, r9
830 ; P8LE-NEXT: sub r6, r6, r8
831 ; P8LE-NEXT: srwi r10, r3, 31
832 ; P8LE-NEXT: srawi r3, r3, 6
833 ; P8LE-NEXT: slwi r8, r9, 5
834 ; P8LE-NEXT: mtvsrd v2, r6
835 ; P8LE-NEXT: add r3, r3, r10
836 ; P8LE-NEXT: srawi r9, r4, 3
837 ; P8LE-NEXT: sub r6, r7, r8
838 ; P8LE-NEXT: mulli r3, r3, 95
839 ; P8LE-NEXT: addze r7, r9
840 ; P8LE-NEXT: mtvsrd v3, r6
841 ; P8LE-NEXT: vmrghh v2, v3, v2
842 ; P8LE-NEXT: sub r3, r5, r3
843 ; P8LE-NEXT: slwi r5, r7, 3
844 ; P8LE-NEXT: sub r4, r4, r5
845 ; P8LE-NEXT: mtvsrd v4, r3
846 ; P8LE-NEXT: mtvsrd v5, r4
847 ; P8LE-NEXT: vmrghh v3, v4, v5
848 ; P8LE-NEXT: vmrglw v2, v3, v2
851 ; P8BE-LABEL: dont_fold_srem_power_of_two:
853 ; P8BE-NEXT: mfvsrd r4, v2
854 ; P8BE-NEXT: lis r3, -21386
855 ; P8BE-NEXT: ori r3, r3, 37253
856 ; P8BE-NEXT: clrldi r5, r4, 48
857 ; P8BE-NEXT: rldicl r6, r4, 32, 48
858 ; P8BE-NEXT: extsh r5, r5
859 ; P8BE-NEXT: extsh r6, r6
860 ; P8BE-NEXT: mulhw r3, r5, r3
861 ; P8BE-NEXT: rldicl r7, r4, 16, 48
862 ; P8BE-NEXT: srawi r8, r6, 5
863 ; P8BE-NEXT: extsh r7, r7
864 ; P8BE-NEXT: addze r8, r8
865 ; P8BE-NEXT: rldicl r4, r4, 48, 48
866 ; P8BE-NEXT: srawi r9, r7, 6
867 ; P8BE-NEXT: extsh r4, r4
868 ; P8BE-NEXT: slwi r8, r8, 5
869 ; P8BE-NEXT: add r3, r3, r5
870 ; P8BE-NEXT: addze r9, r9
871 ; P8BE-NEXT: sub r6, r6, r8
872 ; P8BE-NEXT: srwi r10, r3, 31
873 ; P8BE-NEXT: srawi r3, r3, 6
874 ; P8BE-NEXT: slwi r8, r9, 6
875 ; P8BE-NEXT: mtvsrwz v2, r6
876 ; P8BE-NEXT: add r3, r3, r10
877 ; P8BE-NEXT: srawi r9, r4, 3
878 ; P8BE-NEXT: addis r10, r2, .LCPI3_0@toc@ha
879 ; P8BE-NEXT: sub r6, r7, r8
880 ; P8BE-NEXT: mulli r3, r3, 95
881 ; P8BE-NEXT: addze r8, r9
882 ; P8BE-NEXT: addi r7, r10, .LCPI3_0@toc@l
883 ; P8BE-NEXT: mtvsrwz v4, r6
884 ; P8BE-NEXT: lxvw4x v3, 0, r7
885 ; P8BE-NEXT: sub r3, r5, r3
886 ; P8BE-NEXT: slwi r5, r8, 3
887 ; P8BE-NEXT: vperm v2, v4, v2, v3
888 ; P8BE-NEXT: sub r4, r4, r5
889 ; P8BE-NEXT: mtvsrwz v5, r3
890 ; P8BE-NEXT: mtvsrwz v0, r4
891 ; P8BE-NEXT: vperm v3, v0, v5, v3
892 ; P8BE-NEXT: vmrghw v2, v2, v3
894 %1 = srem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95>
898 ; Don't fold if the divisor is one.
899 define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
900 ; P9LE-LABEL: dont_fold_srem_one:
902 ; P9LE-NEXT: li r3, 2
903 ; P9LE-NEXT: lis r4, -14230
904 ; P9LE-NEXT: vextuhrx r3, r3, v2
905 ; P9LE-NEXT: ori r4, r4, 30865
906 ; P9LE-NEXT: extsh r3, r3
907 ; P9LE-NEXT: mulhw r4, r3, r4
908 ; P9LE-NEXT: add r4, r4, r3
909 ; P9LE-NEXT: srwi r5, r4, 31
910 ; P9LE-NEXT: srawi r4, r4, 9
911 ; P9LE-NEXT: add r4, r4, r5
912 ; P9LE-NEXT: mulli r4, r4, 654
913 ; P9LE-NEXT: sub r3, r3, r4
914 ; P9LE-NEXT: lis r4, -19946
915 ; P9LE-NEXT: mtvsrd v3, r3
916 ; P9LE-NEXT: li r3, 0
917 ; P9LE-NEXT: ori r4, r4, 17097
918 ; P9LE-NEXT: mtvsrd v4, r3
919 ; P9LE-NEXT: li r3, 4
920 ; P9LE-NEXT: vextuhrx r3, r3, v2
921 ; P9LE-NEXT: vmrghh v3, v3, v4
922 ; P9LE-NEXT: extsh r3, r3
923 ; P9LE-NEXT: mulhw r4, r3, r4
924 ; P9LE-NEXT: add r4, r4, r3
925 ; P9LE-NEXT: srwi r5, r4, 31
926 ; P9LE-NEXT: srawi r4, r4, 4
927 ; P9LE-NEXT: add r4, r4, r5
928 ; P9LE-NEXT: mulli r4, r4, 23
929 ; P9LE-NEXT: sub r3, r3, r4
930 ; P9LE-NEXT: lis r4, 24749
931 ; P9LE-NEXT: mtvsrd v4, r3
932 ; P9LE-NEXT: li r3, 6
933 ; P9LE-NEXT: ori r4, r4, 47143
934 ; P9LE-NEXT: vextuhrx r3, r3, v2
935 ; P9LE-NEXT: extsh r3, r3
936 ; P9LE-NEXT: mulhw r4, r3, r4
937 ; P9LE-NEXT: srwi r5, r4, 31
938 ; P9LE-NEXT: srawi r4, r4, 11
939 ; P9LE-NEXT: add r4, r4, r5
940 ; P9LE-NEXT: mulli r4, r4, 5423
941 ; P9LE-NEXT: sub r3, r3, r4
942 ; P9LE-NEXT: mtvsrd v2, r3
943 ; P9LE-NEXT: vmrghh v2, v2, v4
944 ; P9LE-NEXT: vmrglw v2, v2, v3
947 ; P9BE-LABEL: dont_fold_srem_one:
949 ; P9BE-NEXT: li r3, 4
950 ; P9BE-NEXT: lis r4, -19946
951 ; P9BE-NEXT: vextuhlx r3, r3, v2
952 ; P9BE-NEXT: ori r4, r4, 17097
953 ; P9BE-NEXT: extsh r3, r3
954 ; P9BE-NEXT: mulhw r4, r3, r4
955 ; P9BE-NEXT: add r4, r4, r3
956 ; P9BE-NEXT: srwi r5, r4, 31
957 ; P9BE-NEXT: srawi r4, r4, 4
958 ; P9BE-NEXT: add r4, r4, r5
959 ; P9BE-NEXT: mulli r4, r4, 23
960 ; P9BE-NEXT: sub r3, r3, r4
961 ; P9BE-NEXT: lis r4, 24749
962 ; P9BE-NEXT: mtvsrwz v3, r3
963 ; P9BE-NEXT: li r3, 6
964 ; P9BE-NEXT: ori r4, r4, 47143
965 ; P9BE-NEXT: vextuhlx r3, r3, v2
966 ; P9BE-NEXT: extsh r3, r3
967 ; P9BE-NEXT: mulhw r4, r3, r4
968 ; P9BE-NEXT: srwi r5, r4, 31
969 ; P9BE-NEXT: srawi r4, r4, 11
970 ; P9BE-NEXT: add r4, r4, r5
971 ; P9BE-NEXT: mulli r4, r4, 5423
972 ; P9BE-NEXT: sub r3, r3, r4
973 ; P9BE-NEXT: lis r4, -14230
974 ; P9BE-NEXT: mtvsrwz v4, r3
975 ; P9BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha
976 ; P9BE-NEXT: ori r4, r4, 30865
977 ; P9BE-NEXT: addi r3, r3, .LCPI4_0@toc@l
978 ; P9BE-NEXT: lxv v5, 0(r3)
979 ; P9BE-NEXT: li r3, 2
980 ; P9BE-NEXT: vextuhlx r3, r3, v2
981 ; P9BE-NEXT: extsh r3, r3
982 ; P9BE-NEXT: vperm v3, v3, v4, v5
983 ; P9BE-NEXT: mulhw r4, r3, r4
984 ; P9BE-NEXT: add r4, r4, r3
985 ; P9BE-NEXT: srwi r5, r4, 31
986 ; P9BE-NEXT: srawi r4, r4, 9
987 ; P9BE-NEXT: add r4, r4, r5
988 ; P9BE-NEXT: mulli r4, r4, 654
989 ; P9BE-NEXT: sub r3, r3, r4
990 ; P9BE-NEXT: mtvsrwz v2, r3
991 ; P9BE-NEXT: li r3, 0
992 ; P9BE-NEXT: mtvsrwz v4, r3
993 ; P9BE-NEXT: vperm v2, v4, v2, v5
994 ; P9BE-NEXT: vmrghw v2, v2, v3
997 ; P8LE-LABEL: dont_fold_srem_one:
999 ; P8LE-NEXT: xxswapd vs0, v2
1000 ; P8LE-NEXT: lis r5, 24749
1001 ; P8LE-NEXT: lis r6, -19946
1002 ; P8LE-NEXT: lis r8, -14230
1003 ; P8LE-NEXT: ori r5, r5, 47143
1004 ; P8LE-NEXT: ori r6, r6, 17097
1005 ; P8LE-NEXT: ori r8, r8, 30865
1006 ; P8LE-NEXT: mffprd r3, f0
1007 ; P8LE-NEXT: rldicl r4, r3, 16, 48
1008 ; P8LE-NEXT: rldicl r7, r3, 32, 48
1009 ; P8LE-NEXT: rldicl r3, r3, 48, 48
1010 ; P8LE-NEXT: extsh r4, r4
1011 ; P8LE-NEXT: extsh r7, r7
1012 ; P8LE-NEXT: extsh r3, r3
1013 ; P8LE-NEXT: mulhw r5, r4, r5
1014 ; P8LE-NEXT: mulhw r6, r7, r6
1015 ; P8LE-NEXT: mulhw r8, r3, r8
1016 ; P8LE-NEXT: srwi r9, r5, 31
1017 ; P8LE-NEXT: srawi r5, r5, 11
1018 ; P8LE-NEXT: add r6, r6, r7
1019 ; P8LE-NEXT: add r8, r8, r3
1020 ; P8LE-NEXT: add r5, r5, r9
1021 ; P8LE-NEXT: srwi r9, r6, 31
1022 ; P8LE-NEXT: srawi r6, r6, 4
1023 ; P8LE-NEXT: add r6, r6, r9
1024 ; P8LE-NEXT: srwi r9, r8, 31
1025 ; P8LE-NEXT: srawi r8, r8, 9
1026 ; P8LE-NEXT: mulli r5, r5, 5423
1027 ; P8LE-NEXT: add r8, r8, r9
1028 ; P8LE-NEXT: mulli r6, r6, 23
1029 ; P8LE-NEXT: li r9, 0
1030 ; P8LE-NEXT: mulli r8, r8, 654
1031 ; P8LE-NEXT: mtvsrd v2, r9
1032 ; P8LE-NEXT: sub r4, r4, r5
1033 ; P8LE-NEXT: sub r5, r7, r6
1034 ; P8LE-NEXT: mtvsrd v3, r4
1035 ; P8LE-NEXT: sub r3, r3, r8
1036 ; P8LE-NEXT: mtvsrd v4, r5
1037 ; P8LE-NEXT: mtvsrd v5, r3
1038 ; P8LE-NEXT: vmrghh v3, v3, v4
1039 ; P8LE-NEXT: vmrghh v2, v5, v2
1040 ; P8LE-NEXT: vmrglw v2, v3, v2
1043 ; P8BE-LABEL: dont_fold_srem_one:
1045 ; P8BE-NEXT: mfvsrd r4, v2
1046 ; P8BE-NEXT: lis r3, 24749
1047 ; P8BE-NEXT: lis r7, -19946
1048 ; P8BE-NEXT: lis r8, -14230
1049 ; P8BE-NEXT: ori r3, r3, 47143
1050 ; P8BE-NEXT: ori r7, r7, 17097
1051 ; P8BE-NEXT: ori r8, r8, 30865
1052 ; P8BE-NEXT: clrldi r5, r4, 48
1053 ; P8BE-NEXT: rldicl r6, r4, 48, 48
1054 ; P8BE-NEXT: rldicl r4, r4, 32, 48
1055 ; P8BE-NEXT: extsh r5, r5
1056 ; P8BE-NEXT: extsh r6, r6
1057 ; P8BE-NEXT: extsh r4, r4
1058 ; P8BE-NEXT: mulhw r3, r5, r3
1059 ; P8BE-NEXT: mulhw r7, r6, r7
1060 ; P8BE-NEXT: mulhw r8, r4, r8
1061 ; P8BE-NEXT: srawi r9, r3, 11
1062 ; P8BE-NEXT: srwi r3, r3, 31
1063 ; P8BE-NEXT: add r7, r7, r6
1064 ; P8BE-NEXT: add r8, r8, r4
1065 ; P8BE-NEXT: add r3, r9, r3
1066 ; P8BE-NEXT: srwi r9, r7, 31
1067 ; P8BE-NEXT: srawi r7, r7, 4
1068 ; P8BE-NEXT: srawi r10, r8, 9
1069 ; P8BE-NEXT: srwi r8, r8, 31
1070 ; P8BE-NEXT: add r7, r7, r9
1071 ; P8BE-NEXT: addis r9, r2, .LCPI4_0@toc@ha
1072 ; P8BE-NEXT: mulli r3, r3, 5423
1073 ; P8BE-NEXT: add r8, r10, r8
1074 ; P8BE-NEXT: li r10, 0
1075 ; P8BE-NEXT: mulli r7, r7, 23
1076 ; P8BE-NEXT: mulli r8, r8, 654
1077 ; P8BE-NEXT: mtvsrwz v2, r10
1078 ; P8BE-NEXT: sub r3, r5, r3
1079 ; P8BE-NEXT: addi r5, r9, .LCPI4_0@toc@l
1080 ; P8BE-NEXT: lxvw4x v3, 0, r5
1081 ; P8BE-NEXT: sub r5, r6, r7
1082 ; P8BE-NEXT: mtvsrwz v4, r3
1083 ; P8BE-NEXT: sub r3, r4, r8
1084 ; P8BE-NEXT: mtvsrwz v5, r5
1085 ; P8BE-NEXT: mtvsrwz v0, r3
1086 ; P8BE-NEXT: vperm v4, v5, v4, v3
1087 ; P8BE-NEXT: vperm v2, v2, v0, v3
1088 ; P8BE-NEXT: vmrghw v2, v2, v4
1090 %1 = srem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423>
1094 ; Don't fold if the divisor is 2^15.
1095 define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
1096 ; P9LE-LABEL: dont_fold_urem_i16_smax:
1098 ; P9LE-NEXT: li r3, 4
1099 ; P9LE-NEXT: lis r4, -19946
1100 ; P9LE-NEXT: vextuhrx r3, r3, v2
1101 ; P9LE-NEXT: ori r4, r4, 17097
1102 ; P9LE-NEXT: extsh r3, r3
1103 ; P9LE-NEXT: mulhw r4, r3, r4
1104 ; P9LE-NEXT: add r4, r4, r3
1105 ; P9LE-NEXT: srwi r5, r4, 31
1106 ; P9LE-NEXT: srawi r4, r4, 4
1107 ; P9LE-NEXT: add r4, r4, r5
1108 ; P9LE-NEXT: mulli r4, r4, 23
1109 ; P9LE-NEXT: sub r3, r3, r4
1110 ; P9LE-NEXT: lis r4, 24749
1111 ; P9LE-NEXT: mtvsrd v3, r3
1112 ; P9LE-NEXT: li r3, 6
1113 ; P9LE-NEXT: ori r4, r4, 47143
1114 ; P9LE-NEXT: vextuhrx r3, r3, v2
1115 ; P9LE-NEXT: extsh r3, r3
1116 ; P9LE-NEXT: mulhw r4, r3, r4
1117 ; P9LE-NEXT: srwi r5, r4, 31
1118 ; P9LE-NEXT: srawi r4, r4, 11
1119 ; P9LE-NEXT: add r4, r4, r5
1120 ; P9LE-NEXT: mulli r4, r4, 5423
1121 ; P9LE-NEXT: sub r3, r3, r4
1122 ; P9LE-NEXT: mtvsrd v4, r3
1123 ; P9LE-NEXT: li r3, 2
1124 ; P9LE-NEXT: vextuhrx r3, r3, v2
1125 ; P9LE-NEXT: vmrghh v3, v4, v3
1126 ; P9LE-NEXT: extsh r3, r3
1127 ; P9LE-NEXT: srawi r4, r3, 15
1128 ; P9LE-NEXT: addze r4, r4
1129 ; P9LE-NEXT: slwi r4, r4, 15
1130 ; P9LE-NEXT: sub r3, r3, r4
1131 ; P9LE-NEXT: mtvsrd v2, r3
1132 ; P9LE-NEXT: li r3, 0
1133 ; P9LE-NEXT: mtvsrd v4, r3
1134 ; P9LE-NEXT: vmrghh v2, v2, v4
1135 ; P9LE-NEXT: vmrglw v2, v3, v2
1138 ; P9BE-LABEL: dont_fold_urem_i16_smax:
1140 ; P9BE-NEXT: li r3, 4
1141 ; P9BE-NEXT: lis r4, -19946
1142 ; P9BE-NEXT: vextuhlx r3, r3, v2
1143 ; P9BE-NEXT: ori r4, r4, 17097
1144 ; P9BE-NEXT: extsh r3, r3
1145 ; P9BE-NEXT: mulhw r4, r3, r4
1146 ; P9BE-NEXT: add r4, r4, r3
1147 ; P9BE-NEXT: srwi r5, r4, 31
1148 ; P9BE-NEXT: srawi r4, r4, 4
1149 ; P9BE-NEXT: add r4, r4, r5
1150 ; P9BE-NEXT: mulli r4, r4, 23
1151 ; P9BE-NEXT: sub r3, r3, r4
1152 ; P9BE-NEXT: lis r4, 24749
1153 ; P9BE-NEXT: mtvsrwz v3, r3
1154 ; P9BE-NEXT: li r3, 6
1155 ; P9BE-NEXT: ori r4, r4, 47143
1156 ; P9BE-NEXT: vextuhlx r3, r3, v2
1157 ; P9BE-NEXT: extsh r3, r3
1158 ; P9BE-NEXT: mulhw r4, r3, r4
1159 ; P9BE-NEXT: srwi r5, r4, 31
1160 ; P9BE-NEXT: srawi r4, r4, 11
1161 ; P9BE-NEXT: add r4, r4, r5
1162 ; P9BE-NEXT: mulli r4, r4, 5423
1163 ; P9BE-NEXT: sub r3, r3, r4
1164 ; P9BE-NEXT: mtvsrwz v4, r3
1165 ; P9BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha
1166 ; P9BE-NEXT: addi r3, r3, .LCPI5_0@toc@l
1167 ; P9BE-NEXT: lxv v5, 0(r3)
1168 ; P9BE-NEXT: li r3, 2
1169 ; P9BE-NEXT: vextuhlx r3, r3, v2
1170 ; P9BE-NEXT: extsh r3, r3
1171 ; P9BE-NEXT: vperm v3, v3, v4, v5
1172 ; P9BE-NEXT: srawi r4, r3, 15
1173 ; P9BE-NEXT: addze r4, r4
1174 ; P9BE-NEXT: slwi r4, r4, 15
1175 ; P9BE-NEXT: sub r3, r3, r4
1176 ; P9BE-NEXT: mtvsrwz v2, r3
1177 ; P9BE-NEXT: li r3, 0
1178 ; P9BE-NEXT: mtvsrwz v4, r3
1179 ; P9BE-NEXT: vperm v2, v4, v2, v5
1180 ; P9BE-NEXT: vmrghw v2, v2, v3
1183 ; P8LE-LABEL: dont_fold_urem_i16_smax:
1185 ; P8LE-NEXT: xxswapd vs0, v2
1186 ; P8LE-NEXT: lis r4, 24749
1187 ; P8LE-NEXT: lis r5, -19946
1188 ; P8LE-NEXT: ori r4, r4, 47143
1189 ; P8LE-NEXT: ori r5, r5, 17097
1190 ; P8LE-NEXT: mffprd r3, f0
1191 ; P8LE-NEXT: rldicl r6, r3, 16, 48
1192 ; P8LE-NEXT: rldicl r7, r3, 32, 48
1193 ; P8LE-NEXT: extsh r6, r6
1194 ; P8LE-NEXT: extsh r7, r7
1195 ; P8LE-NEXT: mulhw r4, r6, r4
1196 ; P8LE-NEXT: mulhw r5, r7, r5
1197 ; P8LE-NEXT: rldicl r3, r3, 48, 48
1198 ; P8LE-NEXT: extsh r3, r3
1199 ; P8LE-NEXT: srwi r8, r4, 31
1200 ; P8LE-NEXT: srawi r4, r4, 11
1201 ; P8LE-NEXT: add r5, r5, r7
1202 ; P8LE-NEXT: add r4, r4, r8
1203 ; P8LE-NEXT: srwi r8, r5, 31
1204 ; P8LE-NEXT: srawi r5, r5, 4
1205 ; P8LE-NEXT: mulli r4, r4, 5423
1206 ; P8LE-NEXT: add r5, r5, r8
1207 ; P8LE-NEXT: srawi r9, r3, 15
1208 ; P8LE-NEXT: li r8, 0
1209 ; P8LE-NEXT: mulli r5, r5, 23
1210 ; P8LE-NEXT: mtvsrd v2, r8
1211 ; P8LE-NEXT: sub r4, r6, r4
1212 ; P8LE-NEXT: addze r6, r9
1213 ; P8LE-NEXT: slwi r6, r6, 15
1214 ; P8LE-NEXT: mtvsrd v3, r4
1215 ; P8LE-NEXT: sub r5, r7, r5
1216 ; P8LE-NEXT: sub r3, r3, r6
1217 ; P8LE-NEXT: mtvsrd v4, r5
1218 ; P8LE-NEXT: mtvsrd v5, r3
1219 ; P8LE-NEXT: vmrghh v3, v3, v4
1220 ; P8LE-NEXT: vmrghh v2, v5, v2
1221 ; P8LE-NEXT: vmrglw v2, v3, v2
1224 ; P8BE-LABEL: dont_fold_urem_i16_smax:
1226 ; P8BE-NEXT: mfvsrd r3, v2
1227 ; P8BE-NEXT: lis r4, 24749
1228 ; P8BE-NEXT: lis r5, -19946
1229 ; P8BE-NEXT: li r9, 0
1230 ; P8BE-NEXT: ori r4, r4, 47143
1231 ; P8BE-NEXT: ori r5, r5, 17097
1232 ; P8BE-NEXT: mtvsrwz v2, r9
1233 ; P8BE-NEXT: clrldi r6, r3, 48
1234 ; P8BE-NEXT: rldicl r7, r3, 48, 48
1235 ; P8BE-NEXT: extsh r6, r6
1236 ; P8BE-NEXT: extsh r7, r7
1237 ; P8BE-NEXT: mulhw r4, r6, r4
1238 ; P8BE-NEXT: mulhw r5, r7, r5
1239 ; P8BE-NEXT: rldicl r3, r3, 32, 48
1240 ; P8BE-NEXT: extsh r3, r3
1241 ; P8BE-NEXT: srwi r8, r4, 31
1242 ; P8BE-NEXT: srawi r4, r4, 11
1243 ; P8BE-NEXT: add r5, r5, r7
1244 ; P8BE-NEXT: add r4, r4, r8
1245 ; P8BE-NEXT: srwi r8, r5, 31
1246 ; P8BE-NEXT: srawi r5, r5, 4
1247 ; P8BE-NEXT: mulli r4, r4, 5423
1248 ; P8BE-NEXT: add r5, r5, r8
1249 ; P8BE-NEXT: addis r8, r2, .LCPI5_0@toc@ha
1250 ; P8BE-NEXT: srawi r10, r3, 15
1251 ; P8BE-NEXT: mulli r5, r5, 23
1252 ; P8BE-NEXT: sub r4, r6, r4
1253 ; P8BE-NEXT: addi r6, r8, .LCPI5_0@toc@l
1254 ; P8BE-NEXT: addze r8, r10
1255 ; P8BE-NEXT: lxvw4x v3, 0, r6
1256 ; P8BE-NEXT: slwi r6, r8, 15
1257 ; P8BE-NEXT: mtvsrwz v4, r4
1258 ; P8BE-NEXT: sub r5, r7, r5
1259 ; P8BE-NEXT: sub r3, r3, r6
1260 ; P8BE-NEXT: mtvsrwz v5, r5
1261 ; P8BE-NEXT: mtvsrwz v0, r3
1262 ; P8BE-NEXT: vperm v4, v5, v4, v3
1263 ; P8BE-NEXT: vperm v2, v2, v0, v3
1264 ; P8BE-NEXT: vmrghw v2, v2, v4
1266 %1 = srem <4 x i16> %x, <i16 1, i16 32768, i16 23, i16 5423>
1270 ; Don't fold i64 srem.
1271 define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) {
1272 ; P9LE-LABEL: dont_fold_srem_i64:
1274 ; P9LE-NEXT: lis r4, 12374
1275 ; P9LE-NEXT: mfvsrd r3, v3
1276 ; P9LE-NEXT: ori r4, r4, 56339
1277 ; P9LE-NEXT: rldic r4, r4, 33, 1
1278 ; P9LE-NEXT: oris r4, r4, 58853
1279 ; P9LE-NEXT: ori r4, r4, 6055
1280 ; P9LE-NEXT: mulhd r4, r3, r4
1281 ; P9LE-NEXT: rldicl r5, r4, 1, 63
1282 ; P9LE-NEXT: sradi r4, r4, 11
1283 ; P9LE-NEXT: add r4, r4, r5
1284 ; P9LE-NEXT: lis r5, 5698
1285 ; P9LE-NEXT: mulli r4, r4, 5423
1286 ; P9LE-NEXT: ori r5, r5, 51289
1287 ; P9LE-NEXT: rldic r5, r5, 35, 0
1288 ; P9LE-NEXT: oris r5, r5, 22795
1289 ; P9LE-NEXT: sub r3, r3, r4
1290 ; P9LE-NEXT: mfvsrld r4, v3
1291 ; P9LE-NEXT: ori r5, r5, 8549
1292 ; P9LE-NEXT: mulhd r5, r4, r5
1293 ; P9LE-NEXT: add r5, r5, r4
1294 ; P9LE-NEXT: rldicl r6, r5, 1, 63
1295 ; P9LE-NEXT: sradi r5, r5, 4
1296 ; P9LE-NEXT: add r5, r5, r6
1297 ; P9LE-NEXT: mulli r5, r5, 23
1298 ; P9LE-NEXT: sub r4, r4, r5
1299 ; P9LE-NEXT: mtvsrdd v3, r3, r4
1300 ; P9LE-NEXT: lis r4, 3206
1301 ; P9LE-NEXT: mfvsrd r3, v2
1302 ; P9LE-NEXT: ori r4, r4, 42889
1303 ; P9LE-NEXT: rldic r4, r4, 35, 1
1304 ; P9LE-NEXT: oris r4, r4, 1603
1305 ; P9LE-NEXT: ori r4, r4, 21445
1306 ; P9LE-NEXT: mulhd r4, r3, r4
1307 ; P9LE-NEXT: rldicl r5, r4, 1, 63
1308 ; P9LE-NEXT: sradi r4, r4, 8
1309 ; P9LE-NEXT: add r4, r4, r5
1310 ; P9LE-NEXT: mulli r4, r4, 654
1311 ; P9LE-NEXT: sub r3, r3, r4
1312 ; P9LE-NEXT: li r4, 0
1313 ; P9LE-NEXT: mtvsrdd v2, r3, r4
1316 ; P9BE-LABEL: dont_fold_srem_i64:
1318 ; P9BE-NEXT: lis r4, 12374
1319 ; P9BE-NEXT: mfvsrld r3, v3
1320 ; P9BE-NEXT: ori r4, r4, 56339
1321 ; P9BE-NEXT: rldic r4, r4, 33, 1
1322 ; P9BE-NEXT: oris r4, r4, 58853
1323 ; P9BE-NEXT: ori r4, r4, 6055
1324 ; P9BE-NEXT: mulhd r4, r3, r4
1325 ; P9BE-NEXT: rldicl r5, r4, 1, 63
1326 ; P9BE-NEXT: sradi r4, r4, 11
1327 ; P9BE-NEXT: add r4, r4, r5
1328 ; P9BE-NEXT: lis r5, 5698
1329 ; P9BE-NEXT: ori r5, r5, 51289
1330 ; P9BE-NEXT: mulli r4, r4, 5423
1331 ; P9BE-NEXT: rldic r5, r5, 35, 0
1332 ; P9BE-NEXT: oris r5, r5, 22795
1333 ; P9BE-NEXT: sub r3, r3, r4
1334 ; P9BE-NEXT: mfvsrd r4, v3
1335 ; P9BE-NEXT: ori r5, r5, 8549
1336 ; P9BE-NEXT: mulhd r5, r4, r5
1337 ; P9BE-NEXT: add r5, r5, r4
1338 ; P9BE-NEXT: rldicl r6, r5, 1, 63
1339 ; P9BE-NEXT: sradi r5, r5, 4
1340 ; P9BE-NEXT: add r5, r5, r6
1341 ; P9BE-NEXT: mulli r5, r5, 23
1342 ; P9BE-NEXT: sub r4, r4, r5
1343 ; P9BE-NEXT: mtvsrdd v3, r4, r3
1344 ; P9BE-NEXT: lis r4, 3206
1345 ; P9BE-NEXT: mfvsrld r3, v2
1346 ; P9BE-NEXT: ori r4, r4, 42889
1347 ; P9BE-NEXT: rldic r4, r4, 35, 1
1348 ; P9BE-NEXT: oris r4, r4, 1603
1349 ; P9BE-NEXT: ori r4, r4, 21445
1350 ; P9BE-NEXT: mulhd r4, r3, r4
1351 ; P9BE-NEXT: rldicl r5, r4, 1, 63
1352 ; P9BE-NEXT: sradi r4, r4, 8
1353 ; P9BE-NEXT: add r4, r4, r5
1354 ; P9BE-NEXT: mulli r4, r4, 654
1355 ; P9BE-NEXT: sub r3, r3, r4
1356 ; P9BE-NEXT: mtvsrdd v2, 0, r3
1359 ; P8LE-LABEL: dont_fold_srem_i64:
1361 ; P8LE-NEXT: lis r3, 12374
1362 ; P8LE-NEXT: lis r4, 5698
1363 ; P8LE-NEXT: lis r5, 3206
1364 ; P8LE-NEXT: xxswapd vs0, v3
1365 ; P8LE-NEXT: mfvsrd r6, v3
1366 ; P8LE-NEXT: ori r3, r3, 56339
1367 ; P8LE-NEXT: ori r4, r4, 51289
1368 ; P8LE-NEXT: ori r5, r5, 42889
1369 ; P8LE-NEXT: mfvsrd r7, v2
1370 ; P8LE-NEXT: rldic r3, r3, 33, 1
1371 ; P8LE-NEXT: rldic r4, r4, 35, 0
1372 ; P8LE-NEXT: rldic r5, r5, 35, 1
1373 ; P8LE-NEXT: oris r3, r3, 58853
1374 ; P8LE-NEXT: oris r4, r4, 22795
1375 ; P8LE-NEXT: mffprd r8, f0
1376 ; P8LE-NEXT: oris r5, r5, 1603
1377 ; P8LE-NEXT: ori r3, r3, 6055
1378 ; P8LE-NEXT: ori r4, r4, 8549
1379 ; P8LE-NEXT: ori r5, r5, 21445
1380 ; P8LE-NEXT: mulhd r3, r6, r3
1381 ; P8LE-NEXT: mulhd r5, r7, r5
1382 ; P8LE-NEXT: mulhd r4, r8, r4
1383 ; P8LE-NEXT: rldicl r9, r3, 1, 63
1384 ; P8LE-NEXT: sradi r3, r3, 11
1385 ; P8LE-NEXT: add r3, r3, r9
1386 ; P8LE-NEXT: rldicl r9, r5, 1, 63
1387 ; P8LE-NEXT: add r4, r4, r8
1388 ; P8LE-NEXT: sradi r5, r5, 8
1389 ; P8LE-NEXT: mulli r3, r3, 5423
1390 ; P8LE-NEXT: add r5, r5, r9
1391 ; P8LE-NEXT: rldicl r9, r4, 1, 63
1392 ; P8LE-NEXT: sradi r4, r4, 4
1393 ; P8LE-NEXT: mulli r5, r5, 654
1394 ; P8LE-NEXT: add r4, r4, r9
1395 ; P8LE-NEXT: mulli r4, r4, 23
1396 ; P8LE-NEXT: sub r3, r6, r3
1397 ; P8LE-NEXT: mtfprd f0, r3
1398 ; P8LE-NEXT: sub r5, r7, r5
1399 ; P8LE-NEXT: mtfprd f1, r5
1400 ; P8LE-NEXT: sub r3, r8, r4
1401 ; P8LE-NEXT: li r4, 0
1402 ; P8LE-NEXT: mtfprd f2, r3
1403 ; P8LE-NEXT: mtfprd f3, r4
1404 ; P8LE-NEXT: xxmrghd v3, vs0, vs2
1405 ; P8LE-NEXT: xxmrghd v2, vs1, vs3
1408 ; P8BE-LABEL: dont_fold_srem_i64:
1410 ; P8BE-NEXT: lis r4, 5698
1411 ; P8BE-NEXT: lis r3, 12374
1412 ; P8BE-NEXT: xxswapd vs0, v3
1413 ; P8BE-NEXT: lis r5, 3206
1414 ; P8BE-NEXT: xxswapd vs1, v2
1415 ; P8BE-NEXT: ori r4, r4, 51289
1416 ; P8BE-NEXT: ori r3, r3, 56339
1417 ; P8BE-NEXT: ori r5, r5, 42889
1418 ; P8BE-NEXT: mfvsrd r6, v3
1419 ; P8BE-NEXT: rldic r4, r4, 35, 0
1420 ; P8BE-NEXT: rldic r3, r3, 33, 1
1421 ; P8BE-NEXT: oris r4, r4, 22795
1422 ; P8BE-NEXT: rldic r5, r5, 35, 1
1423 ; P8BE-NEXT: oris r3, r3, 58853
1424 ; P8BE-NEXT: mffprd r7, f0
1425 ; P8BE-NEXT: ori r4, r4, 8549
1426 ; P8BE-NEXT: ori r3, r3, 6055
1427 ; P8BE-NEXT: oris r5, r5, 1603
1428 ; P8BE-NEXT: mffprd r8, f1
1429 ; P8BE-NEXT: mulhd r4, r6, r4
1430 ; P8BE-NEXT: mulhd r3, r7, r3
1431 ; P8BE-NEXT: ori r5, r5, 21445
1432 ; P8BE-NEXT: mulhd r5, r8, r5
1433 ; P8BE-NEXT: add r4, r4, r6
1434 ; P8BE-NEXT: rldicl r9, r3, 1, 63
1435 ; P8BE-NEXT: sradi r3, r3, 11
1436 ; P8BE-NEXT: rldicl r10, r4, 1, 63
1437 ; P8BE-NEXT: sradi r4, r4, 4
1438 ; P8BE-NEXT: add r3, r3, r9
1439 ; P8BE-NEXT: rldicl r9, r5, 1, 63
1440 ; P8BE-NEXT: add r4, r4, r10
1441 ; P8BE-NEXT: sradi r5, r5, 8
1442 ; P8BE-NEXT: mulli r3, r3, 5423
1443 ; P8BE-NEXT: add r5, r5, r9
1444 ; P8BE-NEXT: mulli r4, r4, 23
1445 ; P8BE-NEXT: mulli r5, r5, 654
1446 ; P8BE-NEXT: sub r3, r7, r3
1447 ; P8BE-NEXT: sub r4, r6, r4
1448 ; P8BE-NEXT: mtfprd f0, r3
1449 ; P8BE-NEXT: sub r3, r8, r5
1450 ; P8BE-NEXT: mtfprd f1, r4
1451 ; P8BE-NEXT: li r4, 0
1452 ; P8BE-NEXT: mtfprd f2, r3
1453 ; P8BE-NEXT: mtfprd f3, r4
1454 ; P8BE-NEXT: xxmrghd v3, vs1, vs0
1455 ; P8BE-NEXT: xxmrghd v2, vs3, vs2
1457 %1 = srem <4 x i64> %x, <i64 1, i64 654, i64 23, i64 5423>