1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
3 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P9LE
4 ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
5 ; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P9BE
6 ; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
7 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P8LE
8 ; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
9 ; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P8BE
11 define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
12 ; P9LE-LABEL: fold_srem_vec_1:
15 ; P9LE-NEXT: vextuhrx r3, r3, v2
16 ; P9LE-NEXT: extsh r4, r3
17 ; P9LE-NEXT: lis r5, -21386
18 ; P9LE-NEXT: ori r5, r5, 37253
19 ; P9LE-NEXT: extsw r4, r4
20 ; P9LE-NEXT: mulld r5, r4, r5
21 ; P9LE-NEXT: rldicl r5, r5, 32, 32
22 ; P9LE-NEXT: add r4, r5, r4
23 ; P9LE-NEXT: srwi r5, r4, 31
24 ; P9LE-NEXT: srawi r4, r4, 6
25 ; P9LE-NEXT: add r4, r4, r5
26 ; P9LE-NEXT: lis r5, 31710
27 ; P9LE-NEXT: mulli r4, r4, 95
28 ; P9LE-NEXT: subf r3, r4, r3
29 ; P9LE-NEXT: mtvsrd f0, r3
31 ; P9LE-NEXT: vextuhrx r3, r3, v2
32 ; P9LE-NEXT: extsh r4, r3
33 ; P9LE-NEXT: extsw r4, r4
34 ; P9LE-NEXT: ori r5, r5, 63421
35 ; P9LE-NEXT: mulld r5, r4, r5
36 ; P9LE-NEXT: rldicl r5, r5, 32, 32
37 ; P9LE-NEXT: subf r4, r4, r5
38 ; P9LE-NEXT: srwi r5, r4, 31
39 ; P9LE-NEXT: srawi r4, r4, 6
40 ; P9LE-NEXT: add r4, r4, r5
41 ; P9LE-NEXT: lis r5, 21399
42 ; P9LE-NEXT: mulli r4, r4, -124
43 ; P9LE-NEXT: subf r3, r4, r3
44 ; P9LE-NEXT: xxswapd v3, vs0
45 ; P9LE-NEXT: mtvsrd f0, r3
47 ; P9LE-NEXT: vextuhrx r3, r3, v2
48 ; P9LE-NEXT: extsh r4, r3
49 ; P9LE-NEXT: extsw r4, r4
50 ; P9LE-NEXT: ori r5, r5, 33437
51 ; P9LE-NEXT: mulld r4, r4, r5
52 ; P9LE-NEXT: rldicl r5, r4, 1, 63
53 ; P9LE-NEXT: rldicl r4, r4, 32, 32
54 ; P9LE-NEXT: srawi r4, r4, 5
55 ; P9LE-NEXT: add r4, r4, r5
56 ; P9LE-NEXT: lis r5, -16728
57 ; P9LE-NEXT: mulli r4, r4, 98
58 ; P9LE-NEXT: subf r3, r4, r3
59 ; P9LE-NEXT: xxswapd v4, vs0
60 ; P9LE-NEXT: mtvsrd f0, r3
62 ; P9LE-NEXT: vextuhrx r3, r3, v2
63 ; P9LE-NEXT: extsh r4, r3
64 ; P9LE-NEXT: extsw r4, r4
65 ; P9LE-NEXT: ori r5, r5, 63249
66 ; P9LE-NEXT: mulld r4, r4, r5
67 ; P9LE-NEXT: rldicl r5, r4, 1, 63
68 ; P9LE-NEXT: rldicl r4, r4, 32, 32
69 ; P9LE-NEXT: srawi r4, r4, 8
70 ; P9LE-NEXT: add r4, r4, r5
71 ; P9LE-NEXT: mulli r4, r4, -1003
72 ; P9LE-NEXT: subf r3, r4, r3
73 ; P9LE-NEXT: vmrglh v3, v4, v3
74 ; P9LE-NEXT: xxswapd v4, vs0
75 ; P9LE-NEXT: mtvsrd f0, r3
76 ; P9LE-NEXT: xxswapd v2, vs0
77 ; P9LE-NEXT: vmrglh v2, v2, v4
78 ; P9LE-NEXT: vmrglw v2, v2, v3
81 ; P9BE-LABEL: fold_srem_vec_1:
84 ; P9BE-NEXT: vextuhlx r3, r3, v2
85 ; P9BE-NEXT: extsh r3, r3
86 ; P9BE-NEXT: lis r4, 31710
87 ; P9BE-NEXT: ori r4, r4, 63421
88 ; P9BE-NEXT: extsw r3, r3
89 ; P9BE-NEXT: mulld r4, r3, r4
90 ; P9BE-NEXT: rldicl r4, r4, 32, 32
91 ; P9BE-NEXT: subf r4, r3, r4
92 ; P9BE-NEXT: srwi r5, r4, 31
93 ; P9BE-NEXT: srawi r4, r4, 6
94 ; P9BE-NEXT: add r4, r4, r5
95 ; P9BE-NEXT: mulli r4, r4, -124
96 ; P9BE-NEXT: subf r3, r4, r3
97 ; P9BE-NEXT: lis r4, -21386
98 ; P9BE-NEXT: sldi r3, r3, 48
99 ; P9BE-NEXT: mtvsrd v3, r3
100 ; P9BE-NEXT: li r3, 0
101 ; P9BE-NEXT: vextuhlx r3, r3, v2
102 ; P9BE-NEXT: extsh r3, r3
103 ; P9BE-NEXT: extsw r3, r3
104 ; P9BE-NEXT: ori r4, r4, 37253
105 ; P9BE-NEXT: mulld r4, r3, r4
106 ; P9BE-NEXT: rldicl r4, r4, 32, 32
107 ; P9BE-NEXT: add r4, r4, r3
108 ; P9BE-NEXT: srwi r5, r4, 31
109 ; P9BE-NEXT: srawi r4, r4, 6
110 ; P9BE-NEXT: add r4, r4, r5
111 ; P9BE-NEXT: mulli r4, r4, 95
112 ; P9BE-NEXT: subf r3, r4, r3
113 ; P9BE-NEXT: lis r4, -16728
114 ; P9BE-NEXT: sldi r3, r3, 48
115 ; P9BE-NEXT: mtvsrd v4, r3
116 ; P9BE-NEXT: li r3, 6
117 ; P9BE-NEXT: vextuhlx r3, r3, v2
118 ; P9BE-NEXT: extsh r3, r3
119 ; P9BE-NEXT: extsw r3, r3
120 ; P9BE-NEXT: ori r4, r4, 63249
121 ; P9BE-NEXT: mulld r4, r3, r4
122 ; P9BE-NEXT: rldicl r5, r4, 1, 63
123 ; P9BE-NEXT: rldicl r4, r4, 32, 32
124 ; P9BE-NEXT: srawi r4, r4, 8
125 ; P9BE-NEXT: add r4, r4, r5
126 ; P9BE-NEXT: mulli r4, r4, -1003
127 ; P9BE-NEXT: subf r3, r4, r3
128 ; P9BE-NEXT: lis r4, 21399
129 ; P9BE-NEXT: sldi r3, r3, 48
130 ; P9BE-NEXT: vmrghh v3, v4, v3
131 ; P9BE-NEXT: mtvsrd v4, r3
132 ; P9BE-NEXT: li r3, 4
133 ; P9BE-NEXT: vextuhlx r3, r3, v2
134 ; P9BE-NEXT: extsh r3, r3
135 ; P9BE-NEXT: extsw r3, r3
136 ; P9BE-NEXT: ori r4, r4, 33437
137 ; P9BE-NEXT: mulld r4, r3, r4
138 ; P9BE-NEXT: rldicl r5, r4, 1, 63
139 ; P9BE-NEXT: rldicl r4, r4, 32, 32
140 ; P9BE-NEXT: srawi r4, r4, 5
141 ; P9BE-NEXT: add r4, r4, r5
142 ; P9BE-NEXT: mulli r4, r4, 98
143 ; P9BE-NEXT: subf r3, r4, r3
144 ; P9BE-NEXT: sldi r3, r3, 48
145 ; P9BE-NEXT: mtvsrd v2, r3
146 ; P9BE-NEXT: vmrghh v2, v2, v4
147 ; P9BE-NEXT: vmrghw v2, v3, v2
150 ; P8LE-LABEL: fold_srem_vec_1:
152 ; P8LE-NEXT: xxswapd vs0, v2
153 ; P8LE-NEXT: lis r4, 21399
154 ; P8LE-NEXT: lis r9, -16728
155 ; P8LE-NEXT: lis r11, -21386
156 ; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill
157 ; P8LE-NEXT: ori r4, r4, 33437
158 ; P8LE-NEXT: ori r9, r9, 63249
159 ; P8LE-NEXT: ori r11, r11, 37253
160 ; P8LE-NEXT: mfvsrd r5, f0
161 ; P8LE-NEXT: rldicl r3, r5, 32, 48
162 ; P8LE-NEXT: rldicl r6, r5, 16, 48
163 ; P8LE-NEXT: clrldi r7, r5, 48
164 ; P8LE-NEXT: extsh r8, r3
165 ; P8LE-NEXT: extsh r10, r6
166 ; P8LE-NEXT: rldicl r5, r5, 48, 48
167 ; P8LE-NEXT: extsw r8, r8
168 ; P8LE-NEXT: extsh r12, r7
169 ; P8LE-NEXT: extsw r10, r10
170 ; P8LE-NEXT: mulld r4, r8, r4
171 ; P8LE-NEXT: lis r8, 31710
172 ; P8LE-NEXT: extsh r0, r5
173 ; P8LE-NEXT: extsw r12, r12
174 ; P8LE-NEXT: mulld r9, r10, r9
175 ; P8LE-NEXT: ori r8, r8, 63421
176 ; P8LE-NEXT: extsw r10, r0
177 ; P8LE-NEXT: mulld r11, r12, r11
178 ; P8LE-NEXT: mulld r8, r10, r8
179 ; P8LE-NEXT: rldicl r0, r4, 1, 63
180 ; P8LE-NEXT: rldicl r4, r4, 32, 32
181 ; P8LE-NEXT: rldicl r30, r9, 1, 63
182 ; P8LE-NEXT: rldicl r9, r9, 32, 32
183 ; P8LE-NEXT: rldicl r11, r11, 32, 32
184 ; P8LE-NEXT: rldicl r8, r8, 32, 32
185 ; P8LE-NEXT: add r11, r11, r12
186 ; P8LE-NEXT: srawi r4, r4, 5
187 ; P8LE-NEXT: subf r8, r10, r8
188 ; P8LE-NEXT: srawi r9, r9, 8
189 ; P8LE-NEXT: srwi r10, r11, 31
190 ; P8LE-NEXT: add r4, r4, r0
191 ; P8LE-NEXT: srawi r11, r11, 6
192 ; P8LE-NEXT: add r9, r9, r30
193 ; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
194 ; P8LE-NEXT: add r10, r11, r10
195 ; P8LE-NEXT: srwi r11, r8, 31
196 ; P8LE-NEXT: srawi r8, r8, 6
197 ; P8LE-NEXT: mulli r4, r4, 98
198 ; P8LE-NEXT: mulli r9, r9, -1003
199 ; P8LE-NEXT: add r8, r8, r11
200 ; P8LE-NEXT: mulli r10, r10, 95
201 ; P8LE-NEXT: mulli r8, r8, -124
202 ; P8LE-NEXT: subf r3, r4, r3
203 ; P8LE-NEXT: subf r4, r9, r6
204 ; P8LE-NEXT: mtvsrd f0, r3
205 ; P8LE-NEXT: subf r3, r10, r7
206 ; P8LE-NEXT: mtvsrd f1, r4
207 ; P8LE-NEXT: subf r4, r8, r5
208 ; P8LE-NEXT: mtvsrd f2, r3
209 ; P8LE-NEXT: xxswapd v2, vs0
210 ; P8LE-NEXT: mtvsrd f3, r4
211 ; P8LE-NEXT: xxswapd v3, vs1
212 ; P8LE-NEXT: xxswapd v4, vs2
213 ; P8LE-NEXT: xxswapd v5, vs3
214 ; P8LE-NEXT: vmrglh v2, v3, v2
215 ; P8LE-NEXT: vmrglh v3, v5, v4
216 ; P8LE-NEXT: vmrglw v2, v2, v3
219 ; P8BE-LABEL: fold_srem_vec_1:
221 ; P8BE-NEXT: mfvsrd r4, v2
222 ; P8BE-NEXT: lis r3, -16728
223 ; P8BE-NEXT: lis r9, 31710
224 ; P8BE-NEXT: lis r8, 21399
225 ; P8BE-NEXT: lis r10, -21386
226 ; P8BE-NEXT: ori r3, r3, 63249
227 ; P8BE-NEXT: ori r9, r9, 63421
228 ; P8BE-NEXT: ori r8, r8, 33437
229 ; P8BE-NEXT: ori r10, r10, 37253
230 ; P8BE-NEXT: clrldi r5, r4, 48
231 ; P8BE-NEXT: rldicl r7, r4, 32, 48
232 ; P8BE-NEXT: rldicl r6, r4, 48, 48
233 ; P8BE-NEXT: rldicl r4, r4, 16, 48
234 ; P8BE-NEXT: extsh r5, r5
235 ; P8BE-NEXT: extsh r7, r7
236 ; P8BE-NEXT: extsh r6, r6
237 ; P8BE-NEXT: extsw r5, r5
238 ; P8BE-NEXT: extsh r4, r4
239 ; P8BE-NEXT: extsw r7, r7
240 ; P8BE-NEXT: extsw r6, r6
241 ; P8BE-NEXT: mulld r3, r5, r3
242 ; P8BE-NEXT: extsw r4, r4
243 ; P8BE-NEXT: mulld r9, r7, r9
244 ; P8BE-NEXT: mulld r8, r6, r8
245 ; P8BE-NEXT: mulld r10, r4, r10
246 ; P8BE-NEXT: rldicl r11, r3, 1, 63
247 ; P8BE-NEXT: rldicl r3, r3, 32, 32
248 ; P8BE-NEXT: rldicl r9, r9, 32, 32
249 ; P8BE-NEXT: rldicl r12, r8, 1, 63
250 ; P8BE-NEXT: rldicl r8, r8, 32, 32
251 ; P8BE-NEXT: rldicl r10, r10, 32, 32
252 ; P8BE-NEXT: subf r9, r7, r9
253 ; P8BE-NEXT: srawi r3, r3, 8
254 ; P8BE-NEXT: srawi r8, r8, 5
255 ; P8BE-NEXT: add r10, r10, r4
256 ; P8BE-NEXT: add r3, r3, r11
257 ; P8BE-NEXT: srwi r11, r9, 31
258 ; P8BE-NEXT: add r8, r8, r12
259 ; P8BE-NEXT: srawi r9, r9, 6
260 ; P8BE-NEXT: mulli r3, r3, -1003
261 ; P8BE-NEXT: add r9, r9, r11
262 ; P8BE-NEXT: srwi r11, r10, 31
263 ; P8BE-NEXT: srawi r10, r10, 6
264 ; P8BE-NEXT: mulli r8, r8, 98
265 ; P8BE-NEXT: add r10, r10, r11
266 ; P8BE-NEXT: mulli r9, r9, -124
267 ; P8BE-NEXT: mulli r10, r10, 95
268 ; P8BE-NEXT: subf r3, r3, r5
269 ; P8BE-NEXT: sldi r3, r3, 48
270 ; P8BE-NEXT: subf r5, r8, r6
271 ; P8BE-NEXT: mtvsrd v2, r3
272 ; P8BE-NEXT: subf r6, r9, r7
273 ; P8BE-NEXT: sldi r3, r5, 48
274 ; P8BE-NEXT: subf r4, r10, r4
275 ; P8BE-NEXT: mtvsrd v3, r3
276 ; P8BE-NEXT: sldi r3, r6, 48
277 ; P8BE-NEXT: sldi r4, r4, 48
278 ; P8BE-NEXT: mtvsrd v4, r3
279 ; P8BE-NEXT: mtvsrd v5, r4
280 ; P8BE-NEXT: vmrghh v2, v3, v2
281 ; P8BE-NEXT: vmrghh v3, v5, v4
282 ; P8BE-NEXT: vmrghw v2, v3, v2
284 %1 = srem <4 x i16> %x, <i16 95, i16 -124, i16 98, i16 -1003>
288 define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
289 ; P9LE-LABEL: fold_srem_vec_2:
291 ; P9LE-NEXT: li r3, 0
292 ; P9LE-NEXT: vextuhrx r3, r3, v2
293 ; P9LE-NEXT: extsh r4, r3
294 ; P9LE-NEXT: lis r5, -21386
295 ; P9LE-NEXT: ori r5, r5, 37253
296 ; P9LE-NEXT: extsw r4, r4
297 ; P9LE-NEXT: mulld r6, r4, r5
298 ; P9LE-NEXT: rldicl r6, r6, 32, 32
299 ; P9LE-NEXT: add r4, r6, r4
300 ; P9LE-NEXT: srwi r6, r4, 31
301 ; P9LE-NEXT: srawi r4, r4, 6
302 ; P9LE-NEXT: add r4, r4, r6
303 ; P9LE-NEXT: mulli r4, r4, 95
304 ; P9LE-NEXT: subf r3, r4, r3
305 ; P9LE-NEXT: mtvsrd f0, r3
306 ; P9LE-NEXT: li r3, 2
307 ; P9LE-NEXT: vextuhrx r3, r3, v2
308 ; P9LE-NEXT: extsh r4, r3
309 ; P9LE-NEXT: extsw r4, r4
310 ; P9LE-NEXT: mulld r6, r4, r5
311 ; P9LE-NEXT: rldicl r6, r6, 32, 32
312 ; P9LE-NEXT: add r4, r6, r4
313 ; P9LE-NEXT: srwi r6, r4, 31
314 ; P9LE-NEXT: srawi r4, r4, 6
315 ; P9LE-NEXT: add r4, r4, r6
316 ; P9LE-NEXT: mulli r4, r4, 95
317 ; P9LE-NEXT: subf r3, r4, r3
318 ; P9LE-NEXT: xxswapd v3, vs0
319 ; P9LE-NEXT: mtvsrd f0, r3
320 ; P9LE-NEXT: li r3, 4
321 ; P9LE-NEXT: vextuhrx r3, r3, v2
322 ; P9LE-NEXT: extsh r4, r3
323 ; P9LE-NEXT: extsw r4, r4
324 ; P9LE-NEXT: mulld r6, r4, r5
325 ; P9LE-NEXT: rldicl r6, r6, 32, 32
326 ; P9LE-NEXT: add r4, r6, r4
327 ; P9LE-NEXT: srwi r6, r4, 31
328 ; P9LE-NEXT: srawi r4, r4, 6
329 ; P9LE-NEXT: add r4, r4, r6
330 ; P9LE-NEXT: mulli r4, r4, 95
331 ; P9LE-NEXT: subf r3, r4, r3
332 ; P9LE-NEXT: xxswapd v4, vs0
333 ; P9LE-NEXT: mtvsrd f0, r3
334 ; P9LE-NEXT: li r3, 6
335 ; P9LE-NEXT: vextuhrx r3, r3, v2
336 ; P9LE-NEXT: extsh r4, r3
337 ; P9LE-NEXT: extsw r4, r4
338 ; P9LE-NEXT: mulld r5, r4, r5
339 ; P9LE-NEXT: rldicl r5, r5, 32, 32
340 ; P9LE-NEXT: add r4, r5, r4
341 ; P9LE-NEXT: srwi r5, r4, 31
342 ; P9LE-NEXT: srawi r4, r4, 6
343 ; P9LE-NEXT: add r4, r4, r5
344 ; P9LE-NEXT: mulli r4, r4, 95
345 ; P9LE-NEXT: subf r3, r4, r3
346 ; P9LE-NEXT: vmrglh v3, v4, v3
347 ; P9LE-NEXT: xxswapd v4, vs0
348 ; P9LE-NEXT: mtvsrd f0, r3
349 ; P9LE-NEXT: xxswapd v2, vs0
350 ; P9LE-NEXT: vmrglh v2, v2, v4
351 ; P9LE-NEXT: vmrglw v2, v2, v3
354 ; P9BE-LABEL: fold_srem_vec_2:
356 ; P9BE-NEXT: li r3, 6
357 ; P9BE-NEXT: vextuhlx r3, r3, v2
358 ; P9BE-NEXT: extsh r3, r3
359 ; P9BE-NEXT: lis r4, -21386
360 ; P9BE-NEXT: ori r4, r4, 37253
361 ; P9BE-NEXT: extsw r3, r3
362 ; P9BE-NEXT: mulld r5, r3, r4
363 ; P9BE-NEXT: rldicl r5, r5, 32, 32
364 ; P9BE-NEXT: add r5, r5, r3
365 ; P9BE-NEXT: srwi r6, r5, 31
366 ; P9BE-NEXT: srawi r5, r5, 6
367 ; P9BE-NEXT: add r5, r5, r6
368 ; P9BE-NEXT: mulli r5, r5, 95
369 ; P9BE-NEXT: subf r3, r5, r3
370 ; P9BE-NEXT: sldi r3, r3, 48
371 ; P9BE-NEXT: mtvsrd v3, r3
372 ; P9BE-NEXT: li r3, 4
373 ; P9BE-NEXT: vextuhlx r3, r3, v2
374 ; P9BE-NEXT: extsh r3, r3
375 ; P9BE-NEXT: extsw r3, r3
376 ; P9BE-NEXT: mulld r5, r3, r4
377 ; P9BE-NEXT: rldicl r5, r5, 32, 32
378 ; P9BE-NEXT: add r5, r5, r3
379 ; P9BE-NEXT: srwi r6, r5, 31
380 ; P9BE-NEXT: srawi r5, r5, 6
381 ; P9BE-NEXT: add r5, r5, r6
382 ; P9BE-NEXT: mulli r5, r5, 95
383 ; P9BE-NEXT: subf r3, r5, r3
384 ; P9BE-NEXT: sldi r3, r3, 48
385 ; P9BE-NEXT: mtvsrd v4, r3
386 ; P9BE-NEXT: li r3, 2
387 ; P9BE-NEXT: vextuhlx r3, r3, v2
388 ; P9BE-NEXT: extsh r3, r3
389 ; P9BE-NEXT: extsw r3, r3
390 ; P9BE-NEXT: mulld r5, r3, r4
391 ; P9BE-NEXT: rldicl r5, r5, 32, 32
392 ; P9BE-NEXT: add r5, r5, r3
393 ; P9BE-NEXT: srwi r6, r5, 31
394 ; P9BE-NEXT: srawi r5, r5, 6
395 ; P9BE-NEXT: add r5, r5, r6
396 ; P9BE-NEXT: mulli r5, r5, 95
397 ; P9BE-NEXT: subf r3, r5, r3
398 ; P9BE-NEXT: sldi r3, r3, 48
399 ; P9BE-NEXT: vmrghh v3, v4, v3
400 ; P9BE-NEXT: mtvsrd v4, r3
401 ; P9BE-NEXT: li r3, 0
402 ; P9BE-NEXT: vextuhlx r3, r3, v2
403 ; P9BE-NEXT: extsh r3, r3
404 ; P9BE-NEXT: extsw r3, r3
405 ; P9BE-NEXT: mulld r4, r3, r4
406 ; P9BE-NEXT: rldicl r4, r4, 32, 32
407 ; P9BE-NEXT: add r4, r4, r3
408 ; P9BE-NEXT: srwi r5, r4, 31
409 ; P9BE-NEXT: srawi r4, r4, 6
410 ; P9BE-NEXT: add r4, r4, r5
411 ; P9BE-NEXT: mulli r4, r4, 95
412 ; P9BE-NEXT: subf r3, r4, r3
413 ; P9BE-NEXT: sldi r3, r3, 48
414 ; P9BE-NEXT: mtvsrd v2, r3
415 ; P9BE-NEXT: vmrghh v2, v2, v4
416 ; P9BE-NEXT: vmrghw v2, v2, v3
419 ; P8LE-LABEL: fold_srem_vec_2:
421 ; P8LE-NEXT: xxswapd vs0, v2
422 ; P8LE-NEXT: lis r4, -21386
423 ; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill
424 ; P8LE-NEXT: ori r4, r4, 37253
425 ; P8LE-NEXT: mfvsrd r5, f0
426 ; P8LE-NEXT: clrldi r3, r5, 48
427 ; P8LE-NEXT: rldicl r7, r5, 32, 48
428 ; P8LE-NEXT: extsh r8, r3
429 ; P8LE-NEXT: rldicl r6, r5, 48, 48
430 ; P8LE-NEXT: extsh r10, r7
431 ; P8LE-NEXT: rldicl r5, r5, 16, 48
432 ; P8LE-NEXT: extsw r8, r8
433 ; P8LE-NEXT: extsh r9, r6
434 ; P8LE-NEXT: extsw r10, r10
435 ; P8LE-NEXT: extsh r11, r5
436 ; P8LE-NEXT: mulld r12, r8, r4
437 ; P8LE-NEXT: extsw r9, r9
438 ; P8LE-NEXT: extsw r11, r11
439 ; P8LE-NEXT: mulld r30, r10, r4
440 ; P8LE-NEXT: mulld r0, r9, r4
441 ; P8LE-NEXT: mulld r4, r11, r4
442 ; P8LE-NEXT: rldicl r12, r12, 32, 32
443 ; P8LE-NEXT: add r8, r12, r8
444 ; P8LE-NEXT: rldicl r12, r30, 32, 32
445 ; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
446 ; P8LE-NEXT: rldicl r0, r0, 32, 32
447 ; P8LE-NEXT: rldicl r4, r4, 32, 32
448 ; P8LE-NEXT: add r10, r12, r10
449 ; P8LE-NEXT: add r9, r0, r9
450 ; P8LE-NEXT: srwi r0, r8, 31
451 ; P8LE-NEXT: add r4, r4, r11
452 ; P8LE-NEXT: srwi r11, r10, 31
453 ; P8LE-NEXT: srawi r8, r8, 6
454 ; P8LE-NEXT: srawi r10, r10, 6
455 ; P8LE-NEXT: srwi r12, r9, 31
456 ; P8LE-NEXT: add r8, r8, r0
457 ; P8LE-NEXT: srawi r9, r9, 6
458 ; P8LE-NEXT: add r10, r10, r11
459 ; P8LE-NEXT: srwi r11, r4, 31
460 ; P8LE-NEXT: srawi r4, r4, 6
461 ; P8LE-NEXT: add r9, r9, r12
462 ; P8LE-NEXT: mulli r8, r8, 95
463 ; P8LE-NEXT: add r4, r4, r11
464 ; P8LE-NEXT: mulli r9, r9, 95
465 ; P8LE-NEXT: mulli r10, r10, 95
466 ; P8LE-NEXT: mulli r4, r4, 95
467 ; P8LE-NEXT: subf r3, r8, r3
468 ; P8LE-NEXT: subf r6, r9, r6
469 ; P8LE-NEXT: mtvsrd f0, r3
470 ; P8LE-NEXT: subf r3, r10, r7
471 ; P8LE-NEXT: subf r4, r4, r5
472 ; P8LE-NEXT: mtvsrd f1, r6
473 ; P8LE-NEXT: mtvsrd f2, r3
474 ; P8LE-NEXT: xxswapd v2, vs0
475 ; P8LE-NEXT: mtvsrd f3, r4
476 ; P8LE-NEXT: xxswapd v3, vs1
477 ; P8LE-NEXT: xxswapd v4, vs2
478 ; P8LE-NEXT: xxswapd v5, vs3
479 ; P8LE-NEXT: vmrglh v2, v3, v2
480 ; P8LE-NEXT: vmrglh v3, v5, v4
481 ; P8LE-NEXT: vmrglw v2, v3, v2
484 ; P8BE-LABEL: fold_srem_vec_2:
486 ; P8BE-NEXT: mfvsrd r4, v2
487 ; P8BE-NEXT: lis r3, -21386
488 ; P8BE-NEXT: ori r3, r3, 37253
489 ; P8BE-NEXT: clrldi r5, r4, 48
490 ; P8BE-NEXT: rldicl r6, r4, 48, 48
491 ; P8BE-NEXT: extsh r5, r5
492 ; P8BE-NEXT: rldicl r7, r4, 32, 48
493 ; P8BE-NEXT: extsh r6, r6
494 ; P8BE-NEXT: extsw r5, r5
495 ; P8BE-NEXT: rldicl r4, r4, 16, 48
496 ; P8BE-NEXT: extsh r7, r7
497 ; P8BE-NEXT: extsw r6, r6
498 ; P8BE-NEXT: mulld r8, r5, r3
499 ; P8BE-NEXT: extsh r4, r4
500 ; P8BE-NEXT: extsw r7, r7
501 ; P8BE-NEXT: mulld r9, r6, r3
502 ; P8BE-NEXT: extsw r4, r4
503 ; P8BE-NEXT: mulld r10, r7, r3
504 ; P8BE-NEXT: mulld r3, r4, r3
505 ; P8BE-NEXT: rldicl r8, r8, 32, 32
506 ; P8BE-NEXT: rldicl r9, r9, 32, 32
507 ; P8BE-NEXT: add r8, r8, r5
508 ; P8BE-NEXT: rldicl r10, r10, 32, 32
509 ; P8BE-NEXT: add r9, r9, r6
510 ; P8BE-NEXT: srwi r11, r8, 31
511 ; P8BE-NEXT: srawi r8, r8, 6
512 ; P8BE-NEXT: rldicl r3, r3, 32, 32
513 ; P8BE-NEXT: add r10, r10, r7
514 ; P8BE-NEXT: add r8, r8, r11
515 ; P8BE-NEXT: srwi r11, r9, 31
516 ; P8BE-NEXT: add r3, r3, r4
517 ; P8BE-NEXT: srawi r9, r9, 6
518 ; P8BE-NEXT: mulli r8, r8, 95
519 ; P8BE-NEXT: add r9, r9, r11
520 ; P8BE-NEXT: srwi r11, r10, 31
521 ; P8BE-NEXT: srawi r10, r10, 6
522 ; P8BE-NEXT: mulli r9, r9, 95
523 ; P8BE-NEXT: add r10, r10, r11
524 ; P8BE-NEXT: srwi r11, r3, 31
525 ; P8BE-NEXT: srawi r3, r3, 6
526 ; P8BE-NEXT: mulli r10, r10, 95
527 ; P8BE-NEXT: subf r5, r8, r5
528 ; P8BE-NEXT: add r3, r3, r11
529 ; P8BE-NEXT: sldi r5, r5, 48
530 ; P8BE-NEXT: mulli r3, r3, 95
531 ; P8BE-NEXT: subf r6, r9, r6
532 ; P8BE-NEXT: mtvsrd v2, r5
533 ; P8BE-NEXT: sldi r6, r6, 48
534 ; P8BE-NEXT: subf r7, r10, r7
535 ; P8BE-NEXT: mtvsrd v3, r6
536 ; P8BE-NEXT: subf r3, r3, r4
537 ; P8BE-NEXT: sldi r4, r7, 48
538 ; P8BE-NEXT: vmrghh v2, v3, v2
539 ; P8BE-NEXT: sldi r3, r3, 48
540 ; P8BE-NEXT: mtvsrd v4, r4
541 ; P8BE-NEXT: mtvsrd v5, r3
542 ; P8BE-NEXT: vmrghh v3, v5, v4
543 ; P8BE-NEXT: vmrghw v2, v3, v2
545 %1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
550 ; Don't fold if we can combine srem with sdiv.
551 define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
552 ; P9LE-LABEL: combine_srem_sdiv:
554 ; P9LE-NEXT: li r3, 0
555 ; P9LE-NEXT: vextuhrx r3, r3, v2
556 ; P9LE-NEXT: extsh r4, r3
557 ; P9LE-NEXT: lis r5, -21386
558 ; P9LE-NEXT: ori r5, r5, 37253
559 ; P9LE-NEXT: extsw r4, r4
560 ; P9LE-NEXT: mulld r6, r4, r5
561 ; P9LE-NEXT: rldicl r6, r6, 32, 32
562 ; P9LE-NEXT: add r4, r6, r4
563 ; P9LE-NEXT: srwi r6, r4, 31
564 ; P9LE-NEXT: srawi r4, r4, 6
565 ; P9LE-NEXT: add r4, r4, r6
566 ; P9LE-NEXT: mulli r6, r4, 95
567 ; P9LE-NEXT: subf r3, r6, r3
568 ; P9LE-NEXT: mtvsrd f0, r3
569 ; P9LE-NEXT: li r3, 2
570 ; P9LE-NEXT: vextuhrx r3, r3, v2
571 ; P9LE-NEXT: extsh r6, r3
572 ; P9LE-NEXT: extsw r6, r6
573 ; P9LE-NEXT: mulld r7, r6, r5
574 ; P9LE-NEXT: rldicl r7, r7, 32, 32
575 ; P9LE-NEXT: add r6, r7, r6
576 ; P9LE-NEXT: srwi r7, r6, 31
577 ; P9LE-NEXT: srawi r6, r6, 6
578 ; P9LE-NEXT: add r6, r6, r7
579 ; P9LE-NEXT: mulli r7, r6, 95
580 ; P9LE-NEXT: subf r3, r7, r3
581 ; P9LE-NEXT: xxswapd v3, vs0
582 ; P9LE-NEXT: mtvsrd f0, r3
583 ; P9LE-NEXT: li r3, 4
584 ; P9LE-NEXT: vextuhrx r3, r3, v2
585 ; P9LE-NEXT: extsh r7, r3
586 ; P9LE-NEXT: extsw r7, r7
587 ; P9LE-NEXT: mulld r8, r7, r5
588 ; P9LE-NEXT: rldicl r8, r8, 32, 32
589 ; P9LE-NEXT: add r7, r8, r7
590 ; P9LE-NEXT: srwi r8, r7, 31
591 ; P9LE-NEXT: srawi r7, r7, 6
592 ; P9LE-NEXT: add r7, r7, r8
593 ; P9LE-NEXT: mulli r8, r7, 95
594 ; P9LE-NEXT: subf r3, r8, r3
595 ; P9LE-NEXT: xxswapd v4, vs0
596 ; P9LE-NEXT: mtvsrd f0, r3
597 ; P9LE-NEXT: li r3, 6
598 ; P9LE-NEXT: vextuhrx r3, r3, v2
599 ; P9LE-NEXT: extsh r8, r3
600 ; P9LE-NEXT: extsw r8, r8
601 ; P9LE-NEXT: mulld r5, r8, r5
602 ; P9LE-NEXT: rldicl r5, r5, 32, 32
603 ; P9LE-NEXT: add r5, r5, r8
604 ; P9LE-NEXT: srwi r8, r5, 31
605 ; P9LE-NEXT: srawi r5, r5, 6
606 ; P9LE-NEXT: add r5, r5, r8
607 ; P9LE-NEXT: mulli r8, r5, 95
608 ; P9LE-NEXT: subf r3, r8, r3
609 ; P9LE-NEXT: vmrglh v3, v4, v3
610 ; P9LE-NEXT: xxswapd v4, vs0
611 ; P9LE-NEXT: mtvsrd f0, r3
612 ; P9LE-NEXT: xxswapd v2, vs0
613 ; P9LE-NEXT: mtvsrd f0, r4
614 ; P9LE-NEXT: vmrglh v2, v2, v4
615 ; P9LE-NEXT: vmrglw v2, v2, v3
616 ; P9LE-NEXT: xxswapd v3, vs0
617 ; P9LE-NEXT: mtvsrd f0, r6
618 ; P9LE-NEXT: xxswapd v4, vs0
619 ; P9LE-NEXT: mtvsrd f0, r7
620 ; P9LE-NEXT: vmrglh v3, v4, v3
621 ; P9LE-NEXT: xxswapd v4, vs0
622 ; P9LE-NEXT: mtvsrd f0, r5
623 ; P9LE-NEXT: xxswapd v5, vs0
624 ; P9LE-NEXT: vmrglh v4, v5, v4
625 ; P9LE-NEXT: vmrglw v3, v4, v3
626 ; P9LE-NEXT: vadduhm v2, v2, v3
629 ; P9BE-LABEL: combine_srem_sdiv:
631 ; P9BE-NEXT: li r3, 6
632 ; P9BE-NEXT: vextuhlx r3, r3, v2
633 ; P9BE-NEXT: extsh r4, r3
634 ; P9BE-NEXT: lis r5, -21386
635 ; P9BE-NEXT: ori r5, r5, 37253
636 ; P9BE-NEXT: extsw r4, r4
637 ; P9BE-NEXT: mulld r6, r4, r5
638 ; P9BE-NEXT: rldicl r6, r6, 32, 32
639 ; P9BE-NEXT: add r4, r6, r4
640 ; P9BE-NEXT: srwi r6, r4, 31
641 ; P9BE-NEXT: srawi r4, r4, 6
642 ; P9BE-NEXT: add r4, r4, r6
643 ; P9BE-NEXT: mulli r6, r4, 95
644 ; P9BE-NEXT: subf r3, r6, r3
645 ; P9BE-NEXT: sldi r3, r3, 48
646 ; P9BE-NEXT: mtvsrd v3, r3
647 ; P9BE-NEXT: li r3, 4
648 ; P9BE-NEXT: vextuhlx r3, r3, v2
649 ; P9BE-NEXT: extsh r6, r3
650 ; P9BE-NEXT: extsw r6, r6
651 ; P9BE-NEXT: mulld r7, r6, r5
652 ; P9BE-NEXT: rldicl r7, r7, 32, 32
653 ; P9BE-NEXT: add r6, r7, r6
654 ; P9BE-NEXT: srwi r7, r6, 31
655 ; P9BE-NEXT: srawi r6, r6, 6
656 ; P9BE-NEXT: add r6, r6, r7
657 ; P9BE-NEXT: mulli r7, r6, 95
658 ; P9BE-NEXT: subf r3, r7, r3
659 ; P9BE-NEXT: sldi r3, r3, 48
660 ; P9BE-NEXT: mtvsrd v4, r3
661 ; P9BE-NEXT: li r3, 2
662 ; P9BE-NEXT: vextuhlx r3, r3, v2
663 ; P9BE-NEXT: extsh r7, r3
664 ; P9BE-NEXT: extsw r7, r7
665 ; P9BE-NEXT: mulld r8, r7, r5
666 ; P9BE-NEXT: rldicl r8, r8, 32, 32
667 ; P9BE-NEXT: add r7, r8, r7
668 ; P9BE-NEXT: srwi r8, r7, 31
669 ; P9BE-NEXT: srawi r7, r7, 6
670 ; P9BE-NEXT: add r7, r7, r8
671 ; P9BE-NEXT: mulli r8, r7, 95
672 ; P9BE-NEXT: subf r3, r8, r3
673 ; P9BE-NEXT: sldi r3, r3, 48
674 ; P9BE-NEXT: vmrghh v3, v4, v3
675 ; P9BE-NEXT: mtvsrd v4, r3
676 ; P9BE-NEXT: li r3, 0
677 ; P9BE-NEXT: vextuhlx r3, r3, v2
678 ; P9BE-NEXT: extsh r3, r3
679 ; P9BE-NEXT: extsw r3, r3
680 ; P9BE-NEXT: mulld r5, r3, r5
681 ; P9BE-NEXT: rldicl r5, r5, 32, 32
682 ; P9BE-NEXT: add r5, r5, r3
683 ; P9BE-NEXT: srwi r8, r5, 31
684 ; P9BE-NEXT: srawi r5, r5, 6
685 ; P9BE-NEXT: add r5, r5, r8
686 ; P9BE-NEXT: mulli r8, r5, 95
687 ; P9BE-NEXT: subf r3, r8, r3
688 ; P9BE-NEXT: sldi r3, r3, 48
689 ; P9BE-NEXT: mtvsrd v2, r3
690 ; P9BE-NEXT: sldi r3, r4, 48
691 ; P9BE-NEXT: vmrghh v2, v2, v4
692 ; P9BE-NEXT: vmrghw v2, v2, v3
693 ; P9BE-NEXT: mtvsrd v3, r3
694 ; P9BE-NEXT: sldi r3, r6, 48
695 ; P9BE-NEXT: mtvsrd v4, r3
696 ; P9BE-NEXT: sldi r3, r7, 48
697 ; P9BE-NEXT: vmrghh v3, v4, v3
698 ; P9BE-NEXT: mtvsrd v4, r3
699 ; P9BE-NEXT: sldi r3, r5, 48
700 ; P9BE-NEXT: mtvsrd v5, r3
701 ; P9BE-NEXT: vmrghh v4, v5, v4
702 ; P9BE-NEXT: vmrghw v3, v4, v3
703 ; P9BE-NEXT: vadduhm v2, v2, v3
706 ; P8LE-LABEL: combine_srem_sdiv:
708 ; P8LE-NEXT: xxswapd vs0, v2
709 ; P8LE-NEXT: lis r5, -21386
710 ; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill
711 ; P8LE-NEXT: ori r5, r5, 37253
712 ; P8LE-NEXT: mfvsrd r6, f0
713 ; P8LE-NEXT: clrldi r3, r6, 48
714 ; P8LE-NEXT: rldicl r4, r6, 48, 48
715 ; P8LE-NEXT: rldicl r7, r6, 32, 48
716 ; P8LE-NEXT: extsh r8, r3
717 ; P8LE-NEXT: extsh r9, r4
718 ; P8LE-NEXT: rldicl r6, r6, 16, 48
719 ; P8LE-NEXT: extsh r10, r7
720 ; P8LE-NEXT: extsw r8, r8
721 ; P8LE-NEXT: extsw r9, r9
722 ; P8LE-NEXT: extsh r11, r6
723 ; P8LE-NEXT: extsw r10, r10
724 ; P8LE-NEXT: mulld r12, r8, r5
725 ; P8LE-NEXT: extsw r11, r11
726 ; P8LE-NEXT: mulld r0, r9, r5
727 ; P8LE-NEXT: mulld r30, r10, r5
728 ; P8LE-NEXT: mulld r5, r11, r5
729 ; P8LE-NEXT: rldicl r12, r12, 32, 32
730 ; P8LE-NEXT: rldicl r0, r0, 32, 32
731 ; P8LE-NEXT: rldicl r30, r30, 32, 32
732 ; P8LE-NEXT: add r8, r12, r8
733 ; P8LE-NEXT: rldicl r5, r5, 32, 32
734 ; P8LE-NEXT: add r9, r0, r9
735 ; P8LE-NEXT: add r10, r30, r10
736 ; P8LE-NEXT: srwi r12, r8, 31
737 ; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
738 ; P8LE-NEXT: srawi r8, r8, 6
739 ; P8LE-NEXT: srawi r0, r9, 6
740 ; P8LE-NEXT: srwi r9, r9, 31
741 ; P8LE-NEXT: add r5, r5, r11
742 ; P8LE-NEXT: add r8, r8, r12
743 ; P8LE-NEXT: srawi r12, r10, 6
744 ; P8LE-NEXT: srwi r10, r10, 31
745 ; P8LE-NEXT: add r9, r0, r9
746 ; P8LE-NEXT: mulli r0, r8, 95
747 ; P8LE-NEXT: add r10, r12, r10
748 ; P8LE-NEXT: mtvsrd f0, r8
749 ; P8LE-NEXT: srwi r8, r5, 31
750 ; P8LE-NEXT: srawi r5, r5, 6
751 ; P8LE-NEXT: mulli r11, r9, 95
752 ; P8LE-NEXT: mtvsrd f1, r9
753 ; P8LE-NEXT: mulli r9, r10, 95
754 ; P8LE-NEXT: add r5, r5, r8
755 ; P8LE-NEXT: xxswapd v2, vs0
756 ; P8LE-NEXT: mtvsrd f2, r10
757 ; P8LE-NEXT: mtvsrd f3, r5
758 ; P8LE-NEXT: mulli r5, r5, 95
759 ; P8LE-NEXT: xxswapd v3, vs1
760 ; P8LE-NEXT: subf r3, r0, r3
761 ; P8LE-NEXT: xxswapd v1, vs2
762 ; P8LE-NEXT: mtvsrd f0, r3
763 ; P8LE-NEXT: subf r4, r11, r4
764 ; P8LE-NEXT: xxswapd v6, vs3
765 ; P8LE-NEXT: subf r3, r9, r7
766 ; P8LE-NEXT: mtvsrd f1, r4
767 ; P8LE-NEXT: mtvsrd f4, r3
768 ; P8LE-NEXT: subf r3, r5, r6
769 ; P8LE-NEXT: mtvsrd f5, r3
770 ; P8LE-NEXT: xxswapd v4, vs1
771 ; P8LE-NEXT: vmrglh v2, v3, v2
772 ; P8LE-NEXT: xxswapd v3, vs0
773 ; P8LE-NEXT: xxswapd v5, vs4
774 ; P8LE-NEXT: xxswapd v0, vs5
775 ; P8LE-NEXT: vmrglh v3, v4, v3
776 ; P8LE-NEXT: vmrglh v4, v0, v5
777 ; P8LE-NEXT: vmrglh v5, v6, v1
778 ; P8LE-NEXT: vmrglw v3, v4, v3
779 ; P8LE-NEXT: vmrglw v2, v5, v2
780 ; P8LE-NEXT: vadduhm v2, v3, v2
783 ; P8BE-LABEL: combine_srem_sdiv:
785 ; P8BE-NEXT: mfvsrd r6, v2
786 ; P8BE-NEXT: lis r5, -21386
787 ; P8BE-NEXT: ori r5, r5, 37253
788 ; P8BE-NEXT: clrldi r3, r6, 48
789 ; P8BE-NEXT: rldicl r4, r6, 48, 48
790 ; P8BE-NEXT: extsh r8, r3
791 ; P8BE-NEXT: rldicl r7, r6, 32, 48
792 ; P8BE-NEXT: extsh r9, r4
793 ; P8BE-NEXT: rldicl r6, r6, 16, 48
794 ; P8BE-NEXT: extsw r8, r8
795 ; P8BE-NEXT: extsh r10, r7
796 ; P8BE-NEXT: extsw r9, r9
797 ; P8BE-NEXT: extsh r6, r6
798 ; P8BE-NEXT: mulld r11, r8, r5
799 ; P8BE-NEXT: extsw r10, r10
800 ; P8BE-NEXT: extsw r6, r6
801 ; P8BE-NEXT: mulld r12, r9, r5
802 ; P8BE-NEXT: mulld r0, r10, r5
803 ; P8BE-NEXT: mulld r5, r6, r5
804 ; P8BE-NEXT: rldicl r11, r11, 32, 32
805 ; P8BE-NEXT: rldicl r12, r12, 32, 32
806 ; P8BE-NEXT: add r8, r11, r8
807 ; P8BE-NEXT: rldicl r0, r0, 32, 32
808 ; P8BE-NEXT: rldicl r5, r5, 32, 32
809 ; P8BE-NEXT: add r9, r12, r9
810 ; P8BE-NEXT: srawi r11, r8, 6
811 ; P8BE-NEXT: srwi r8, r8, 31
812 ; P8BE-NEXT: add r10, r0, r10
813 ; P8BE-NEXT: add r5, r5, r6
814 ; P8BE-NEXT: srawi r12, r9, 6
815 ; P8BE-NEXT: srwi r9, r9, 31
816 ; P8BE-NEXT: add r8, r11, r8
817 ; P8BE-NEXT: srawi r0, r10, 6
818 ; P8BE-NEXT: srawi r11, r5, 6
819 ; P8BE-NEXT: srwi r10, r10, 31
820 ; P8BE-NEXT: add r9, r12, r9
821 ; P8BE-NEXT: srwi r5, r5, 31
822 ; P8BE-NEXT: mulli r12, r8, 95
823 ; P8BE-NEXT: add r10, r0, r10
824 ; P8BE-NEXT: add r5, r11, r5
825 ; P8BE-NEXT: mulli r0, r9, 95
826 ; P8BE-NEXT: sldi r9, r9, 48
827 ; P8BE-NEXT: sldi r8, r8, 48
828 ; P8BE-NEXT: mtvsrd v3, r9
829 ; P8BE-NEXT: mulli r9, r5, 95
830 ; P8BE-NEXT: mtvsrd v2, r8
831 ; P8BE-NEXT: mulli r8, r10, 95
832 ; P8BE-NEXT: sldi r10, r10, 48
833 ; P8BE-NEXT: subf r3, r12, r3
834 ; P8BE-NEXT: mtvsrd v4, r10
835 ; P8BE-NEXT: subf r4, r0, r4
836 ; P8BE-NEXT: sldi r3, r3, 48
837 ; P8BE-NEXT: vmrghh v2, v3, v2
838 ; P8BE-NEXT: sldi r4, r4, 48
839 ; P8BE-NEXT: mtvsrd v3, r3
840 ; P8BE-NEXT: subf r3, r9, r6
841 ; P8BE-NEXT: subf r7, r8, r7
842 ; P8BE-NEXT: mtvsrd v5, r4
843 ; P8BE-NEXT: sldi r3, r3, 48
844 ; P8BE-NEXT: sldi r6, r7, 48
845 ; P8BE-NEXT: mtvsrd v1, r3
846 ; P8BE-NEXT: sldi r3, r5, 48
847 ; P8BE-NEXT: mtvsrd v0, r6
848 ; P8BE-NEXT: vmrghh v3, v5, v3
849 ; P8BE-NEXT: mtvsrd v5, r3
850 ; P8BE-NEXT: vmrghh v0, v1, v0
851 ; P8BE-NEXT: vmrghh v4, v5, v4
852 ; P8BE-NEXT: vmrghw v3, v0, v3
853 ; P8BE-NEXT: vmrghw v2, v4, v2
854 ; P8BE-NEXT: vadduhm v2, v3, v2
856 %1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
857 %2 = sdiv <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
858 %3 = add <4 x i16> %1, %2
862 ; Don't fold for divisors that are a power of two.
863 define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
864 ; P9LE-LABEL: dont_fold_srem_power_of_two:
866 ; P9LE-NEXT: li r3, 0
867 ; P9LE-NEXT: vextuhrx r3, r3, v2
868 ; P9LE-NEXT: extsh r4, r3
869 ; P9LE-NEXT: srawi r4, r4, 6
870 ; P9LE-NEXT: addze r4, r4
871 ; P9LE-NEXT: slwi r4, r4, 6
872 ; P9LE-NEXT: subf r3, r4, r3
873 ; P9LE-NEXT: mtvsrd f0, r3
874 ; P9LE-NEXT: li r3, 2
875 ; P9LE-NEXT: vextuhrx r3, r3, v2
876 ; P9LE-NEXT: extsh r4, r3
877 ; P9LE-NEXT: srawi r4, r4, 5
878 ; P9LE-NEXT: addze r4, r4
879 ; P9LE-NEXT: slwi r4, r4, 5
880 ; P9LE-NEXT: subf r3, r4, r3
881 ; P9LE-NEXT: xxswapd v3, vs0
882 ; P9LE-NEXT: mtvsrd f0, r3
883 ; P9LE-NEXT: li r3, 6
884 ; P9LE-NEXT: vextuhrx r3, r3, v2
885 ; P9LE-NEXT: extsh r4, r3
886 ; P9LE-NEXT: lis r5, -21386
887 ; P9LE-NEXT: ori r5, r5, 37253
888 ; P9LE-NEXT: xxswapd v4, vs0
889 ; P9LE-NEXT: vmrglh v3, v4, v3
890 ; P9LE-NEXT: extsw r4, r4
891 ; P9LE-NEXT: mulld r5, r4, r5
892 ; P9LE-NEXT: rldicl r5, r5, 32, 32
893 ; P9LE-NEXT: add r4, r5, r4
894 ; P9LE-NEXT: srwi r5, r4, 31
895 ; P9LE-NEXT: srawi r4, r4, 6
896 ; P9LE-NEXT: add r4, r4, r5
897 ; P9LE-NEXT: mulli r4, r4, 95
898 ; P9LE-NEXT: subf r3, r4, r3
899 ; P9LE-NEXT: mtvsrd f0, r3
900 ; P9LE-NEXT: li r3, 4
901 ; P9LE-NEXT: vextuhrx r3, r3, v2
902 ; P9LE-NEXT: extsh r4, r3
903 ; P9LE-NEXT: srawi r4, r4, 3
904 ; P9LE-NEXT: addze r4, r4
905 ; P9LE-NEXT: slwi r4, r4, 3
906 ; P9LE-NEXT: subf r3, r4, r3
907 ; P9LE-NEXT: xxswapd v4, vs0
908 ; P9LE-NEXT: mtvsrd f0, r3
909 ; P9LE-NEXT: xxswapd v2, vs0
910 ; P9LE-NEXT: vmrglh v2, v4, v2
911 ; P9LE-NEXT: vmrglw v2, v2, v3
914 ; P9BE-LABEL: dont_fold_srem_power_of_two:
916 ; P9BE-NEXT: li r3, 2
917 ; P9BE-NEXT: vextuhlx r3, r3, v2
918 ; P9BE-NEXT: extsh r3, r3
919 ; P9BE-NEXT: srawi r4, r3, 5
920 ; P9BE-NEXT: addze r4, r4
921 ; P9BE-NEXT: slwi r4, r4, 5
922 ; P9BE-NEXT: subf r3, r4, r3
923 ; P9BE-NEXT: sldi r3, r3, 48
924 ; P9BE-NEXT: mtvsrd v3, r3
925 ; P9BE-NEXT: li r3, 0
926 ; P9BE-NEXT: vextuhlx r3, r3, v2
927 ; P9BE-NEXT: extsh r3, r3
928 ; P9BE-NEXT: srawi r4, r3, 6
929 ; P9BE-NEXT: addze r4, r4
930 ; P9BE-NEXT: slwi r4, r4, 6
931 ; P9BE-NEXT: subf r3, r4, r3
932 ; P9BE-NEXT: lis r4, -21386
933 ; P9BE-NEXT: sldi r3, r3, 48
934 ; P9BE-NEXT: mtvsrd v4, r3
935 ; P9BE-NEXT: li r3, 6
936 ; P9BE-NEXT: vextuhlx r3, r3, v2
937 ; P9BE-NEXT: extsh r3, r3
938 ; P9BE-NEXT: extsw r3, r3
939 ; P9BE-NEXT: ori r4, r4, 37253
940 ; P9BE-NEXT: mulld r4, r3, r4
941 ; P9BE-NEXT: rldicl r4, r4, 32, 32
942 ; P9BE-NEXT: add r4, r4, r3
943 ; P9BE-NEXT: srwi r5, r4, 31
944 ; P9BE-NEXT: srawi r4, r4, 6
945 ; P9BE-NEXT: add r4, r4, r5
946 ; P9BE-NEXT: mulli r4, r4, 95
947 ; P9BE-NEXT: subf r3, r4, r3
948 ; P9BE-NEXT: sldi r3, r3, 48
949 ; P9BE-NEXT: vmrghh v3, v4, v3
950 ; P9BE-NEXT: mtvsrd v4, r3
951 ; P9BE-NEXT: li r3, 4
952 ; P9BE-NEXT: vextuhlx r3, r3, v2
953 ; P9BE-NEXT: extsh r3, r3
954 ; P9BE-NEXT: srawi r4, r3, 3
955 ; P9BE-NEXT: addze r4, r4
956 ; P9BE-NEXT: slwi r4, r4, 3
957 ; P9BE-NEXT: subf r3, r4, r3
958 ; P9BE-NEXT: sldi r3, r3, 48
959 ; P9BE-NEXT: mtvsrd v2, r3
960 ; P9BE-NEXT: vmrghh v2, v2, v4
961 ; P9BE-NEXT: vmrghw v2, v3, v2
964 ; P8LE-LABEL: dont_fold_srem_power_of_two:
966 ; P8LE-NEXT: xxswapd vs0, v2
967 ; P8LE-NEXT: lis r3, -21386
968 ; P8LE-NEXT: ori r3, r3, 37253
969 ; P8LE-NEXT: mfvsrd r4, f0
970 ; P8LE-NEXT: rldicl r5, r4, 16, 48
971 ; P8LE-NEXT: clrldi r7, r4, 48
972 ; P8LE-NEXT: extsh r6, r5
973 ; P8LE-NEXT: extsh r8, r7
974 ; P8LE-NEXT: extsw r6, r6
975 ; P8LE-NEXT: rldicl r9, r4, 48, 48
976 ; P8LE-NEXT: mulld r3, r6, r3
977 ; P8LE-NEXT: srawi r8, r8, 6
978 ; P8LE-NEXT: extsh r10, r9
979 ; P8LE-NEXT: addze r8, r8
980 ; P8LE-NEXT: rldicl r4, r4, 32, 48
981 ; P8LE-NEXT: srawi r10, r10, 5
982 ; P8LE-NEXT: slwi r8, r8, 6
983 ; P8LE-NEXT: subf r7, r8, r7
984 ; P8LE-NEXT: rldicl r3, r3, 32, 32
985 ; P8LE-NEXT: mtvsrd f0, r7
986 ; P8LE-NEXT: add r3, r3, r6
987 ; P8LE-NEXT: addze r6, r10
988 ; P8LE-NEXT: srwi r10, r3, 31
989 ; P8LE-NEXT: srawi r3, r3, 6
990 ; P8LE-NEXT: slwi r6, r6, 5
991 ; P8LE-NEXT: xxswapd v2, vs0
992 ; P8LE-NEXT: add r3, r3, r10
993 ; P8LE-NEXT: extsh r10, r4
994 ; P8LE-NEXT: subf r6, r6, r9
995 ; P8LE-NEXT: mulli r3, r3, 95
996 ; P8LE-NEXT: srawi r8, r10, 3
997 ; P8LE-NEXT: mtvsrd f1, r6
998 ; P8LE-NEXT: addze r7, r8
999 ; P8LE-NEXT: xxswapd v3, vs1
1000 ; P8LE-NEXT: subf r3, r3, r5
1001 ; P8LE-NEXT: slwi r5, r7, 3
1002 ; P8LE-NEXT: subf r4, r5, r4
1003 ; P8LE-NEXT: mtvsrd f2, r3
1004 ; P8LE-NEXT: mtvsrd f3, r4
1005 ; P8LE-NEXT: xxswapd v4, vs2
1006 ; P8LE-NEXT: vmrglh v2, v3, v2
1007 ; P8LE-NEXT: xxswapd v5, vs3
1008 ; P8LE-NEXT: vmrglh v3, v4, v5
1009 ; P8LE-NEXT: vmrglw v2, v3, v2
1012 ; P8BE-LABEL: dont_fold_srem_power_of_two:
1014 ; P8BE-NEXT: mfvsrd r4, v2
1015 ; P8BE-NEXT: lis r3, -21386
1016 ; P8BE-NEXT: ori r3, r3, 37253
1017 ; P8BE-NEXT: clrldi r5, r4, 48
1018 ; P8BE-NEXT: rldicl r6, r4, 32, 48
1019 ; P8BE-NEXT: extsh r5, r5
1020 ; P8BE-NEXT: extsh r6, r6
1021 ; P8BE-NEXT: extsw r5, r5
1022 ; P8BE-NEXT: rldicl r7, r4, 16, 48
1023 ; P8BE-NEXT: mulld r3, r5, r3
1024 ; P8BE-NEXT: srawi r8, r6, 5
1025 ; P8BE-NEXT: extsh r7, r7
1026 ; P8BE-NEXT: addze r8, r8
1027 ; P8BE-NEXT: rldicl r4, r4, 48, 48
1028 ; P8BE-NEXT: srawi r9, r7, 6
1029 ; P8BE-NEXT: extsh r4, r4
1030 ; P8BE-NEXT: slwi r8, r8, 5
1031 ; P8BE-NEXT: addze r9, r9
1032 ; P8BE-NEXT: subf r6, r8, r6
1033 ; P8BE-NEXT: rldicl r3, r3, 32, 32
1034 ; P8BE-NEXT: slwi r8, r9, 6
1035 ; P8BE-NEXT: add r3, r3, r5
1036 ; P8BE-NEXT: subf r7, r8, r7
1037 ; P8BE-NEXT: srwi r10, r3, 31
1038 ; P8BE-NEXT: srawi r3, r3, 6
1039 ; P8BE-NEXT: add r3, r3, r10
1040 ; P8BE-NEXT: srawi r9, r4, 3
1041 ; P8BE-NEXT: mulli r3, r3, 95
1042 ; P8BE-NEXT: sldi r6, r6, 48
1043 ; P8BE-NEXT: addze r8, r9
1044 ; P8BE-NEXT: mtvsrd v2, r6
1045 ; P8BE-NEXT: slwi r6, r8, 3
1046 ; P8BE-NEXT: subf r4, r6, r4
1047 ; P8BE-NEXT: sldi r4, r4, 48
1048 ; P8BE-NEXT: subf r3, r3, r5
1049 ; P8BE-NEXT: sldi r5, r7, 48
1050 ; P8BE-NEXT: mtvsrd v5, r4
1051 ; P8BE-NEXT: sldi r3, r3, 48
1052 ; P8BE-NEXT: mtvsrd v3, r5
1053 ; P8BE-NEXT: mtvsrd v4, r3
1054 ; P8BE-NEXT: vmrghh v2, v3, v2
1055 ; P8BE-NEXT: vmrghh v3, v5, v4
1056 ; P8BE-NEXT: vmrghw v2, v2, v3
1058 %1 = srem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95>
1062 ; Don't fold if the divisor is one.
1063 define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
1064 ; P9LE-LABEL: dont_fold_srem_one:
1066 ; P9LE-NEXT: li r3, 2
1067 ; P9LE-NEXT: vextuhrx r3, r3, v2
1068 ; P9LE-NEXT: extsh r4, r3
1069 ; P9LE-NEXT: lis r5, -14230
1070 ; P9LE-NEXT: ori r5, r5, 30865
1071 ; P9LE-NEXT: extsw r4, r4
1072 ; P9LE-NEXT: mulld r5, r4, r5
1073 ; P9LE-NEXT: rldicl r5, r5, 32, 32
1074 ; P9LE-NEXT: xxlxor v4, v4, v4
1075 ; P9LE-NEXT: add r4, r5, r4
1076 ; P9LE-NEXT: srwi r5, r4, 31
1077 ; P9LE-NEXT: srawi r4, r4, 9
1078 ; P9LE-NEXT: add r4, r4, r5
1079 ; P9LE-NEXT: lis r5, -19946
1080 ; P9LE-NEXT: mulli r4, r4, 654
1081 ; P9LE-NEXT: subf r3, r4, r3
1082 ; P9LE-NEXT: mtvsrd f0, r3
1083 ; P9LE-NEXT: li r3, 4
1084 ; P9LE-NEXT: vextuhrx r3, r3, v2
1085 ; P9LE-NEXT: extsh r4, r3
1086 ; P9LE-NEXT: extsw r4, r4
1087 ; P9LE-NEXT: ori r5, r5, 17097
1088 ; P9LE-NEXT: mulld r5, r4, r5
1089 ; P9LE-NEXT: rldicl r5, r5, 32, 32
1090 ; P9LE-NEXT: add r4, r5, r4
1091 ; P9LE-NEXT: srwi r5, r4, 31
1092 ; P9LE-NEXT: srawi r4, r4, 4
1093 ; P9LE-NEXT: add r4, r4, r5
1094 ; P9LE-NEXT: lis r5, 24749
1095 ; P9LE-NEXT: mulli r4, r4, 23
1096 ; P9LE-NEXT: subf r3, r4, r3
1097 ; P9LE-NEXT: xxswapd v3, vs0
1098 ; P9LE-NEXT: mtvsrd f0, r3
1099 ; P9LE-NEXT: li r3, 6
1100 ; P9LE-NEXT: vextuhrx r3, r3, v2
1101 ; P9LE-NEXT: extsh r4, r3
1102 ; P9LE-NEXT: extsw r4, r4
1103 ; P9LE-NEXT: ori r5, r5, 47143
1104 ; P9LE-NEXT: mulld r4, r4, r5
1105 ; P9LE-NEXT: rldicl r5, r4, 1, 63
1106 ; P9LE-NEXT: rldicl r4, r4, 32, 32
1107 ; P9LE-NEXT: srawi r4, r4, 11
1108 ; P9LE-NEXT: add r4, r4, r5
1109 ; P9LE-NEXT: mulli r4, r4, 5423
1110 ; P9LE-NEXT: subf r3, r4, r3
1111 ; P9LE-NEXT: vmrglh v3, v3, v4
1112 ; P9LE-NEXT: xxswapd v4, vs0
1113 ; P9LE-NEXT: mtvsrd f0, r3
1114 ; P9LE-NEXT: xxswapd v2, vs0
1115 ; P9LE-NEXT: vmrglh v2, v2, v4
1116 ; P9LE-NEXT: vmrglw v2, v2, v3
1119 ; P9BE-LABEL: dont_fold_srem_one:
1121 ; P9BE-NEXT: li r3, 4
1122 ; P9BE-NEXT: vextuhlx r3, r3, v2
1123 ; P9BE-NEXT: extsh r3, r3
1124 ; P9BE-NEXT: lis r4, -19946
1125 ; P9BE-NEXT: ori r4, r4, 17097
1126 ; P9BE-NEXT: extsw r3, r3
1127 ; P9BE-NEXT: mulld r4, r3, r4
1128 ; P9BE-NEXT: rldicl r4, r4, 32, 32
1129 ; P9BE-NEXT: add r4, r4, r3
1130 ; P9BE-NEXT: srwi r5, r4, 31
1131 ; P9BE-NEXT: srawi r4, r4, 4
1132 ; P9BE-NEXT: add r4, r4, r5
1133 ; P9BE-NEXT: mulli r4, r4, 23
1134 ; P9BE-NEXT: subf r3, r4, r3
1135 ; P9BE-NEXT: lis r4, 24749
1136 ; P9BE-NEXT: sldi r3, r3, 48
1137 ; P9BE-NEXT: mtvsrd v3, r3
1138 ; P9BE-NEXT: li r3, 6
1139 ; P9BE-NEXT: vextuhlx r3, r3, v2
1140 ; P9BE-NEXT: extsh r3, r3
1141 ; P9BE-NEXT: extsw r3, r3
1142 ; P9BE-NEXT: ori r4, r4, 47143
1143 ; P9BE-NEXT: mulld r4, r3, r4
1144 ; P9BE-NEXT: rldicl r5, r4, 1, 63
1145 ; P9BE-NEXT: rldicl r4, r4, 32, 32
1146 ; P9BE-NEXT: srawi r4, r4, 11
1147 ; P9BE-NEXT: add r4, r4, r5
1148 ; P9BE-NEXT: mulli r4, r4, 5423
1149 ; P9BE-NEXT: subf r3, r4, r3
1150 ; P9BE-NEXT: lis r4, -14230
1151 ; P9BE-NEXT: sldi r3, r3, 48
1152 ; P9BE-NEXT: mtvsrd v4, r3
1153 ; P9BE-NEXT: li r3, 2
1154 ; P9BE-NEXT: vextuhlx r3, r3, v2
1155 ; P9BE-NEXT: extsh r3, r3
1156 ; P9BE-NEXT: extsw r3, r3
1157 ; P9BE-NEXT: ori r4, r4, 30865
1158 ; P9BE-NEXT: mulld r4, r3, r4
1159 ; P9BE-NEXT: rldicl r4, r4, 32, 32
1160 ; P9BE-NEXT: add r4, r4, r3
1161 ; P9BE-NEXT: srwi r5, r4, 31
1162 ; P9BE-NEXT: srawi r4, r4, 9
1163 ; P9BE-NEXT: add r4, r4, r5
1164 ; P9BE-NEXT: mulli r4, r4, 654
1165 ; P9BE-NEXT: subf r3, r4, r3
1166 ; P9BE-NEXT: sldi r3, r3, 48
1167 ; P9BE-NEXT: mtvsrd v2, r3
1168 ; P9BE-NEXT: li r3, 0
1169 ; P9BE-NEXT: sldi r3, r3, 48
1170 ; P9BE-NEXT: vmrghh v3, v3, v4
1171 ; P9BE-NEXT: mtvsrd v4, r3
1172 ; P9BE-NEXT: vmrghh v2, v4, v2
1173 ; P9BE-NEXT: vmrghw v2, v2, v3
1176 ; P8LE-LABEL: dont_fold_srem_one:
1178 ; P8LE-NEXT: xxswapd vs0, v2
1179 ; P8LE-NEXT: lis r3, 24749
1180 ; P8LE-NEXT: lis r8, -19946
1181 ; P8LE-NEXT: lis r10, -14230
1182 ; P8LE-NEXT: xxlxor v5, v5, v5
1183 ; P8LE-NEXT: ori r3, r3, 47143
1184 ; P8LE-NEXT: ori r8, r8, 17097
1185 ; P8LE-NEXT: mfvsrd r4, f0
1186 ; P8LE-NEXT: rldicl r5, r4, 16, 48
1187 ; P8LE-NEXT: rldicl r6, r4, 32, 48
1188 ; P8LE-NEXT: rldicl r4, r4, 48, 48
1189 ; P8LE-NEXT: extsh r7, r5
1190 ; P8LE-NEXT: extsh r9, r6
1191 ; P8LE-NEXT: extsw r7, r7
1192 ; P8LE-NEXT: extsh r11, r4
1193 ; P8LE-NEXT: extsw r9, r9
1194 ; P8LE-NEXT: mulld r3, r7, r3
1195 ; P8LE-NEXT: ori r7, r10, 30865
1196 ; P8LE-NEXT: extsw r10, r11
1197 ; P8LE-NEXT: mulld r8, r9, r8
1198 ; P8LE-NEXT: mulld r7, r10, r7
1199 ; P8LE-NEXT: rldicl r11, r3, 1, 63
1200 ; P8LE-NEXT: rldicl r3, r3, 32, 32
1201 ; P8LE-NEXT: rldicl r8, r8, 32, 32
1202 ; P8LE-NEXT: rldicl r7, r7, 32, 32
1203 ; P8LE-NEXT: add r8, r8, r9
1204 ; P8LE-NEXT: srawi r3, r3, 11
1205 ; P8LE-NEXT: add r7, r7, r10
1206 ; P8LE-NEXT: srwi r9, r8, 31
1207 ; P8LE-NEXT: srawi r8, r8, 4
1208 ; P8LE-NEXT: add r3, r3, r11
1209 ; P8LE-NEXT: add r8, r8, r9
1210 ; P8LE-NEXT: srwi r9, r7, 31
1211 ; P8LE-NEXT: srawi r7, r7, 9
1212 ; P8LE-NEXT: mulli r3, r3, 5423
1213 ; P8LE-NEXT: add r7, r7, r9
1214 ; P8LE-NEXT: mulli r8, r8, 23
1215 ; P8LE-NEXT: mulli r7, r7, 654
1216 ; P8LE-NEXT: subf r3, r3, r5
1217 ; P8LE-NEXT: mtvsrd f0, r3
1218 ; P8LE-NEXT: subf r3, r8, r6
1219 ; P8LE-NEXT: subf r4, r7, r4
1220 ; P8LE-NEXT: mtvsrd f1, r3
1221 ; P8LE-NEXT: mtvsrd f2, r4
1222 ; P8LE-NEXT: xxswapd v2, vs0
1223 ; P8LE-NEXT: xxswapd v3, vs1
1224 ; P8LE-NEXT: xxswapd v4, vs2
1225 ; P8LE-NEXT: vmrglh v2, v2, v3
1226 ; P8LE-NEXT: vmrglh v3, v4, v5
1227 ; P8LE-NEXT: vmrglw v2, v2, v3
1230 ; P8BE-LABEL: dont_fold_srem_one:
1232 ; P8BE-NEXT: mfvsrd r4, v2
1233 ; P8BE-NEXT: lis r3, 24749
1234 ; P8BE-NEXT: lis r7, -19946
1235 ; P8BE-NEXT: lis r8, -14230
1236 ; P8BE-NEXT: ori r3, r3, 47143
1237 ; P8BE-NEXT: ori r7, r7, 17097
1238 ; P8BE-NEXT: ori r8, r8, 30865
1239 ; P8BE-NEXT: clrldi r5, r4, 48
1240 ; P8BE-NEXT: rldicl r6, r4, 48, 48
1241 ; P8BE-NEXT: rldicl r4, r4, 32, 48
1242 ; P8BE-NEXT: extsh r5, r5
1243 ; P8BE-NEXT: extsh r6, r6
1244 ; P8BE-NEXT: extsh r4, r4
1245 ; P8BE-NEXT: extsw r5, r5
1246 ; P8BE-NEXT: extsw r6, r6
1247 ; P8BE-NEXT: extsw r4, r4
1248 ; P8BE-NEXT: mulld r3, r5, r3
1249 ; P8BE-NEXT: mulld r7, r6, r7
1250 ; P8BE-NEXT: mulld r8, r4, r8
1251 ; P8BE-NEXT: rldicl r9, r3, 1, 63
1252 ; P8BE-NEXT: rldicl r3, r3, 32, 32
1253 ; P8BE-NEXT: rldicl r7, r7, 32, 32
1254 ; P8BE-NEXT: rldicl r8, r8, 32, 32
1255 ; P8BE-NEXT: srawi r3, r3, 11
1256 ; P8BE-NEXT: add r7, r7, r6
1257 ; P8BE-NEXT: add r8, r8, r4
1258 ; P8BE-NEXT: add r3, r3, r9
1259 ; P8BE-NEXT: srwi r9, r7, 31
1260 ; P8BE-NEXT: srawi r7, r7, 4
1261 ; P8BE-NEXT: mulli r3, r3, 5423
1262 ; P8BE-NEXT: add r7, r7, r9
1263 ; P8BE-NEXT: srwi r9, r8, 31
1264 ; P8BE-NEXT: srawi r8, r8, 9
1265 ; P8BE-NEXT: mulli r7, r7, 23
1266 ; P8BE-NEXT: add r8, r8, r9
1267 ; P8BE-NEXT: li r9, 0
1268 ; P8BE-NEXT: mulli r8, r8, 654
1269 ; P8BE-NEXT: subf r3, r3, r5
1270 ; P8BE-NEXT: sldi r5, r9, 48
1271 ; P8BE-NEXT: sldi r3, r3, 48
1272 ; P8BE-NEXT: mtvsrd v2, r5
1273 ; P8BE-NEXT: subf r5, r7, r6
1274 ; P8BE-NEXT: mtvsrd v3, r3
1275 ; P8BE-NEXT: sldi r3, r5, 48
1276 ; P8BE-NEXT: subf r4, r8, r4
1277 ; P8BE-NEXT: mtvsrd v4, r3
1278 ; P8BE-NEXT: sldi r4, r4, 48
1279 ; P8BE-NEXT: mtvsrd v5, r4
1280 ; P8BE-NEXT: vmrghh v3, v4, v3
1281 ; P8BE-NEXT: vmrghh v2, v2, v5
1282 ; P8BE-NEXT: vmrghw v2, v2, v3
1284 %1 = srem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423>
1288 ; Don't fold if the divisor is 2^15.
1289 define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
1290 ; P9LE-LABEL: dont_fold_urem_i16_smax:
1292 ; P9LE-NEXT: li r3, 4
1293 ; P9LE-NEXT: vextuhrx r3, r3, v2
1294 ; P9LE-NEXT: extsh r4, r3
1295 ; P9LE-NEXT: lis r5, -19946
1296 ; P9LE-NEXT: ori r5, r5, 17097
1297 ; P9LE-NEXT: extsw r4, r4
1298 ; P9LE-NEXT: mulld r5, r4, r5
1299 ; P9LE-NEXT: rldicl r5, r5, 32, 32
1300 ; P9LE-NEXT: add r4, r5, r4
1301 ; P9LE-NEXT: srwi r5, r4, 31
1302 ; P9LE-NEXT: srawi r4, r4, 4
1303 ; P9LE-NEXT: add r4, r4, r5
1304 ; P9LE-NEXT: lis r5, 24749
1305 ; P9LE-NEXT: mulli r4, r4, 23
1306 ; P9LE-NEXT: subf r3, r4, r3
1307 ; P9LE-NEXT: mtvsrd f0, r3
1308 ; P9LE-NEXT: li r3, 6
1309 ; P9LE-NEXT: vextuhrx r3, r3, v2
1310 ; P9LE-NEXT: extsh r4, r3
1311 ; P9LE-NEXT: extsw r4, r4
1312 ; P9LE-NEXT: ori r5, r5, 47143
1313 ; P9LE-NEXT: mulld r4, r4, r5
1314 ; P9LE-NEXT: rldicl r5, r4, 1, 63
1315 ; P9LE-NEXT: rldicl r4, r4, 32, 32
1316 ; P9LE-NEXT: srawi r4, r4, 11
1317 ; P9LE-NEXT: add r4, r4, r5
1318 ; P9LE-NEXT: mulli r4, r4, 5423
1319 ; P9LE-NEXT: subf r3, r4, r3
1320 ; P9LE-NEXT: xxswapd v3, vs0
1321 ; P9LE-NEXT: mtvsrd f0, r3
1322 ; P9LE-NEXT: li r3, 2
1323 ; P9LE-NEXT: vextuhrx r3, r3, v2
1324 ; P9LE-NEXT: extsh r4, r3
1325 ; P9LE-NEXT: srawi r4, r4, 15
1326 ; P9LE-NEXT: addze r4, r4
1327 ; P9LE-NEXT: slwi r4, r4, 15
1328 ; P9LE-NEXT: subf r3, r4, r3
1329 ; P9LE-NEXT: xxswapd v4, vs0
1330 ; P9LE-NEXT: mtvsrd f0, r3
1331 ; P9LE-NEXT: xxswapd v2, vs0
1332 ; P9LE-NEXT: vmrglh v3, v4, v3
1333 ; P9LE-NEXT: xxlxor v4, v4, v4
1334 ; P9LE-NEXT: vmrglh v2, v2, v4
1335 ; P9LE-NEXT: vmrglw v2, v3, v2
1338 ; P9BE-LABEL: dont_fold_urem_i16_smax:
1340 ; P9BE-NEXT: li r3, 4
1341 ; P9BE-NEXT: vextuhlx r3, r3, v2
1342 ; P9BE-NEXT: extsh r3, r3
1343 ; P9BE-NEXT: lis r4, -19946
1344 ; P9BE-NEXT: ori r4, r4, 17097
1345 ; P9BE-NEXT: extsw r3, r3
1346 ; P9BE-NEXT: mulld r4, r3, r4
1347 ; P9BE-NEXT: rldicl r4, r4, 32, 32
1348 ; P9BE-NEXT: add r4, r4, r3
1349 ; P9BE-NEXT: srwi r5, r4, 31
1350 ; P9BE-NEXT: srawi r4, r4, 4
1351 ; P9BE-NEXT: add r4, r4, r5
1352 ; P9BE-NEXT: mulli r4, r4, 23
1353 ; P9BE-NEXT: subf r3, r4, r3
1354 ; P9BE-NEXT: lis r4, 24749
1355 ; P9BE-NEXT: sldi r3, r3, 48
1356 ; P9BE-NEXT: mtvsrd v3, r3
1357 ; P9BE-NEXT: li r3, 6
1358 ; P9BE-NEXT: vextuhlx r3, r3, v2
1359 ; P9BE-NEXT: extsh r3, r3
1360 ; P9BE-NEXT: extsw r3, r3
1361 ; P9BE-NEXT: ori r4, r4, 47143
1362 ; P9BE-NEXT: mulld r4, r3, r4
1363 ; P9BE-NEXT: rldicl r5, r4, 1, 63
1364 ; P9BE-NEXT: rldicl r4, r4, 32, 32
1365 ; P9BE-NEXT: srawi r4, r4, 11
1366 ; P9BE-NEXT: add r4, r4, r5
1367 ; P9BE-NEXT: mulli r4, r4, 5423
1368 ; P9BE-NEXT: subf r3, r4, r3
1369 ; P9BE-NEXT: sldi r3, r3, 48
1370 ; P9BE-NEXT: mtvsrd v4, r3
1371 ; P9BE-NEXT: li r3, 2
1372 ; P9BE-NEXT: vextuhlx r3, r3, v2
1373 ; P9BE-NEXT: extsh r3, r3
1374 ; P9BE-NEXT: srawi r4, r3, 15
1375 ; P9BE-NEXT: addze r4, r4
1376 ; P9BE-NEXT: slwi r4, r4, 15
1377 ; P9BE-NEXT: subf r3, r4, r3
1378 ; P9BE-NEXT: sldi r3, r3, 48
1379 ; P9BE-NEXT: mtvsrd v2, r3
1380 ; P9BE-NEXT: li r3, 0
1381 ; P9BE-NEXT: sldi r3, r3, 48
1382 ; P9BE-NEXT: vmrghh v3, v3, v4
1383 ; P9BE-NEXT: mtvsrd v4, r3
1384 ; P9BE-NEXT: vmrghh v2, v4, v2
1385 ; P9BE-NEXT: vmrghw v2, v2, v3
1388 ; P8LE-LABEL: dont_fold_urem_i16_smax:
1390 ; P8LE-NEXT: xxswapd vs0, v2
1391 ; P8LE-NEXT: lis r6, 24749
1392 ; P8LE-NEXT: lis r7, -19946
1393 ; P8LE-NEXT: xxlxor v5, v5, v5
1394 ; P8LE-NEXT: ori r6, r6, 47143
1395 ; P8LE-NEXT: ori r7, r7, 17097
1396 ; P8LE-NEXT: mfvsrd r3, f0
1397 ; P8LE-NEXT: rldicl r4, r3, 16, 48
1398 ; P8LE-NEXT: rldicl r5, r3, 32, 48
1399 ; P8LE-NEXT: extsh r8, r4
1400 ; P8LE-NEXT: extsh r9, r5
1401 ; P8LE-NEXT: extsw r8, r8
1402 ; P8LE-NEXT: extsw r9, r9
1403 ; P8LE-NEXT: mulld r6, r8, r6
1404 ; P8LE-NEXT: mulld r7, r9, r7
1405 ; P8LE-NEXT: rldicl r3, r3, 48, 48
1406 ; P8LE-NEXT: rldicl r8, r6, 32, 32
1407 ; P8LE-NEXT: rldicl r7, r7, 32, 32
1408 ; P8LE-NEXT: rldicl r6, r6, 1, 63
1409 ; P8LE-NEXT: srawi r8, r8, 11
1410 ; P8LE-NEXT: add r7, r7, r9
1411 ; P8LE-NEXT: add r6, r8, r6
1412 ; P8LE-NEXT: srwi r8, r7, 31
1413 ; P8LE-NEXT: srawi r7, r7, 4
1414 ; P8LE-NEXT: mulli r6, r6, 5423
1415 ; P8LE-NEXT: add r7, r7, r8
1416 ; P8LE-NEXT: extsh r8, r3
1417 ; P8LE-NEXT: mulli r7, r7, 23
1418 ; P8LE-NEXT: srawi r8, r8, 15
1419 ; P8LE-NEXT: subf r4, r6, r4
1420 ; P8LE-NEXT: addze r6, r8
1421 ; P8LE-NEXT: mtvsrd f0, r4
1422 ; P8LE-NEXT: slwi r4, r6, 15
1423 ; P8LE-NEXT: subf r5, r7, r5
1424 ; P8LE-NEXT: subf r3, r4, r3
1425 ; P8LE-NEXT: mtvsrd f1, r5
1426 ; P8LE-NEXT: xxswapd v2, vs0
1427 ; P8LE-NEXT: mtvsrd f2, r3
1428 ; P8LE-NEXT: xxswapd v3, vs1
1429 ; P8LE-NEXT: xxswapd v4, vs2
1430 ; P8LE-NEXT: vmrglh v2, v2, v3
1431 ; P8LE-NEXT: vmrglh v3, v4, v5
1432 ; P8LE-NEXT: vmrglw v2, v2, v3
1435 ; P8BE-LABEL: dont_fold_urem_i16_smax:
1437 ; P8BE-NEXT: mfvsrd r4, v2
1438 ; P8BE-NEXT: lis r3, 24749
1439 ; P8BE-NEXT: lis r7, -19946
1440 ; P8BE-NEXT: ori r3, r3, 47143
1441 ; P8BE-NEXT: ori r7, r7, 17097
1442 ; P8BE-NEXT: clrldi r5, r4, 48
1443 ; P8BE-NEXT: rldicl r6, r4, 48, 48
1444 ; P8BE-NEXT: extsh r5, r5
1445 ; P8BE-NEXT: extsh r6, r6
1446 ; P8BE-NEXT: extsw r5, r5
1447 ; P8BE-NEXT: extsw r6, r6
1448 ; P8BE-NEXT: mulld r3, r5, r3
1449 ; P8BE-NEXT: mulld r7, r6, r7
1450 ; P8BE-NEXT: rldicl r4, r4, 32, 48
1451 ; P8BE-NEXT: extsh r4, r4
1452 ; P8BE-NEXT: rldicl r8, r3, 1, 63
1453 ; P8BE-NEXT: rldicl r3, r3, 32, 32
1454 ; P8BE-NEXT: rldicl r7, r7, 32, 32
1455 ; P8BE-NEXT: srawi r3, r3, 11
1456 ; P8BE-NEXT: add r7, r7, r6
1457 ; P8BE-NEXT: add r3, r3, r8
1458 ; P8BE-NEXT: srwi r8, r7, 31
1459 ; P8BE-NEXT: srawi r7, r7, 4
1460 ; P8BE-NEXT: mulli r3, r3, 5423
1461 ; P8BE-NEXT: add r7, r7, r8
1462 ; P8BE-NEXT: li r8, 0
1463 ; P8BE-NEXT: mulli r7, r7, 23
1464 ; P8BE-NEXT: srawi r9, r4, 15
1465 ; P8BE-NEXT: subf r3, r3, r5
1466 ; P8BE-NEXT: sldi r5, r8, 48
1467 ; P8BE-NEXT: addze r8, r9
1468 ; P8BE-NEXT: mtvsrd v2, r5
1469 ; P8BE-NEXT: subf r5, r7, r6
1470 ; P8BE-NEXT: slwi r6, r8, 15
1471 ; P8BE-NEXT: sldi r3, r3, 48
1472 ; P8BE-NEXT: subf r4, r6, r4
1473 ; P8BE-NEXT: mtvsrd v3, r3
1474 ; P8BE-NEXT: sldi r3, r5, 48
1475 ; P8BE-NEXT: sldi r4, r4, 48
1476 ; P8BE-NEXT: mtvsrd v4, r3
1477 ; P8BE-NEXT: mtvsrd v5, r4
1478 ; P8BE-NEXT: vmrghh v3, v4, v3
1479 ; P8BE-NEXT: vmrghh v2, v2, v5
1480 ; P8BE-NEXT: vmrghw v2, v2, v3
1482 %1 = srem <4 x i16> %x, <i16 1, i16 32768, i16 23, i16 5423>
1486 ; Don't fold i64 srem.
1487 define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) {
1488 ; P9LE-LABEL: dont_fold_srem_i64:
1490 ; P9LE-NEXT: lis r4, 24749
1491 ; P9LE-NEXT: ori r4, r4, 47142
1492 ; P9LE-NEXT: sldi r4, r4, 32
1493 ; P9LE-NEXT: oris r4, r4, 58853
1494 ; P9LE-NEXT: mfvsrd r3, v3
1495 ; P9LE-NEXT: ori r4, r4, 6055
1496 ; P9LE-NEXT: mulhd r4, r3, r4
1497 ; P9LE-NEXT: rldicl r5, r4, 1, 63
1498 ; P9LE-NEXT: sradi r4, r4, 11
1499 ; P9LE-NEXT: add r4, r4, r5
1500 ; P9LE-NEXT: lis r5, -19946
1501 ; P9LE-NEXT: mulli r4, r4, 5423
1502 ; P9LE-NEXT: ori r5, r5, 17096
1503 ; P9LE-NEXT: sldi r5, r5, 32
1504 ; P9LE-NEXT: oris r5, r5, 22795
1505 ; P9LE-NEXT: sub r3, r3, r4
1506 ; P9LE-NEXT: mfvsrld r4, v3
1507 ; P9LE-NEXT: ori r5, r5, 8549
1508 ; P9LE-NEXT: mulhd r5, r4, r5
1509 ; P9LE-NEXT: add r5, r5, r4
1510 ; P9LE-NEXT: rldicl r6, r5, 1, 63
1511 ; P9LE-NEXT: sradi r5, r5, 4
1512 ; P9LE-NEXT: add r5, r5, r6
1513 ; P9LE-NEXT: mulli r5, r5, 23
1514 ; P9LE-NEXT: sub r4, r4, r5
1515 ; P9LE-NEXT: mtvsrdd v3, r3, r4
1516 ; P9LE-NEXT: lis r4, 25653
1517 ; P9LE-NEXT: ori r4, r4, 15432
1518 ; P9LE-NEXT: sldi r4, r4, 32
1519 ; P9LE-NEXT: oris r4, r4, 1603
1520 ; P9LE-NEXT: mfvsrd r3, v2
1521 ; P9LE-NEXT: ori r4, r4, 21445
1522 ; P9LE-NEXT: mulhd r4, r3, r4
1523 ; P9LE-NEXT: rldicl r5, r4, 1, 63
1524 ; P9LE-NEXT: sradi r4, r4, 8
1525 ; P9LE-NEXT: add r4, r4, r5
1526 ; P9LE-NEXT: mulli r4, r4, 654
1527 ; P9LE-NEXT: sub r3, r3, r4
1528 ; P9LE-NEXT: li r4, 0
1529 ; P9LE-NEXT: mtvsrdd v2, r3, r4
1532 ; P9BE-LABEL: dont_fold_srem_i64:
1534 ; P9BE-NEXT: lis r4, 24749
1535 ; P9BE-NEXT: ori r4, r4, 47142
1536 ; P9BE-NEXT: sldi r4, r4, 32
1537 ; P9BE-NEXT: oris r4, r4, 58853
1538 ; P9BE-NEXT: mfvsrld r3, v3
1539 ; P9BE-NEXT: ori r4, r4, 6055
1540 ; P9BE-NEXT: mulhd r4, r3, r4
1541 ; P9BE-NEXT: rldicl r5, r4, 1, 63
1542 ; P9BE-NEXT: sradi r4, r4, 11
1543 ; P9BE-NEXT: add r4, r4, r5
1544 ; P9BE-NEXT: lis r5, -19946
1545 ; P9BE-NEXT: ori r5, r5, 17096
1546 ; P9BE-NEXT: mulli r4, r4, 5423
1547 ; P9BE-NEXT: sldi r5, r5, 32
1548 ; P9BE-NEXT: oris r5, r5, 22795
1549 ; P9BE-NEXT: sub r3, r3, r4
1550 ; P9BE-NEXT: mfvsrd r4, v3
1551 ; P9BE-NEXT: ori r5, r5, 8549
1552 ; P9BE-NEXT: mulhd r5, r4, r5
1553 ; P9BE-NEXT: add r5, r5, r4
1554 ; P9BE-NEXT: rldicl r6, r5, 1, 63
1555 ; P9BE-NEXT: sradi r5, r5, 4
1556 ; P9BE-NEXT: add r5, r5, r6
1557 ; P9BE-NEXT: mulli r5, r5, 23
1558 ; P9BE-NEXT: sub r4, r4, r5
1559 ; P9BE-NEXT: mtvsrdd v3, r4, r3
1560 ; P9BE-NEXT: lis r4, 25653
1561 ; P9BE-NEXT: ori r4, r4, 15432
1562 ; P9BE-NEXT: sldi r4, r4, 32
1563 ; P9BE-NEXT: oris r4, r4, 1603
1564 ; P9BE-NEXT: mfvsrld r3, v2
1565 ; P9BE-NEXT: ori r4, r4, 21445
1566 ; P9BE-NEXT: mulhd r4, r3, r4
1567 ; P9BE-NEXT: rldicl r5, r4, 1, 63
1568 ; P9BE-NEXT: sradi r4, r4, 8
1569 ; P9BE-NEXT: add r4, r4, r5
1570 ; P9BE-NEXT: mulli r4, r4, 654
1571 ; P9BE-NEXT: sub r3, r3, r4
1572 ; P9BE-NEXT: mtvsrdd v2, 0, r3
1575 ; P8LE-LABEL: dont_fold_srem_i64:
1577 ; P8LE-NEXT: lis r3, 24749
1578 ; P8LE-NEXT: lis r4, -19946
1579 ; P8LE-NEXT: lis r5, 25653
1580 ; P8LE-NEXT: xxswapd vs0, v3
1581 ; P8LE-NEXT: mfvsrd r6, v3
1582 ; P8LE-NEXT: ori r3, r3, 47142
1583 ; P8LE-NEXT: ori r4, r4, 17096
1584 ; P8LE-NEXT: ori r5, r5, 15432
1585 ; P8LE-NEXT: mfvsrd r7, v2
1586 ; P8LE-NEXT: sldi r3, r3, 32
1587 ; P8LE-NEXT: sldi r4, r4, 32
1588 ; P8LE-NEXT: sldi r5, r5, 32
1589 ; P8LE-NEXT: oris r3, r3, 58853
1590 ; P8LE-NEXT: oris r4, r4, 22795
1591 ; P8LE-NEXT: mfvsrd r8, f0
1592 ; P8LE-NEXT: oris r5, r5, 1603
1593 ; P8LE-NEXT: ori r3, r3, 6055
1594 ; P8LE-NEXT: ori r4, r4, 8549
1595 ; P8LE-NEXT: ori r5, r5, 21445
1596 ; P8LE-NEXT: mulhd r3, r6, r3
1597 ; P8LE-NEXT: mulhd r5, r7, r5
1598 ; P8LE-NEXT: mulhd r4, r8, r4
1599 ; P8LE-NEXT: rldicl r9, r3, 1, 63
1600 ; P8LE-NEXT: sradi r3, r3, 11
1601 ; P8LE-NEXT: add r3, r3, r9
1602 ; P8LE-NEXT: rldicl r9, r5, 1, 63
1603 ; P8LE-NEXT: add r4, r4, r8
1604 ; P8LE-NEXT: sradi r5, r5, 8
1605 ; P8LE-NEXT: mulli r3, r3, 5423
1606 ; P8LE-NEXT: add r5, r5, r9
1607 ; P8LE-NEXT: rldicl r9, r4, 1, 63
1608 ; P8LE-NEXT: sradi r4, r4, 4
1609 ; P8LE-NEXT: mulli r5, r5, 654
1610 ; P8LE-NEXT: add r4, r4, r9
1611 ; P8LE-NEXT: mulli r4, r4, 23
1612 ; P8LE-NEXT: sub r3, r6, r3
1613 ; P8LE-NEXT: mtvsrd f0, r3
1614 ; P8LE-NEXT: sub r5, r7, r5
1615 ; P8LE-NEXT: mtvsrd f1, r5
1616 ; P8LE-NEXT: sub r3, r8, r4
1617 ; P8LE-NEXT: li r4, 0
1618 ; P8LE-NEXT: mtvsrd f2, r3
1619 ; P8LE-NEXT: mtvsrd f3, r4
1620 ; P8LE-NEXT: xxmrghd v3, vs0, vs2
1621 ; P8LE-NEXT: xxmrghd v2, vs1, vs3
1624 ; P8BE-LABEL: dont_fold_srem_i64:
1626 ; P8BE-NEXT: lis r4, -19946
1627 ; P8BE-NEXT: lis r3, 24749
1628 ; P8BE-NEXT: xxswapd vs0, v3
1629 ; P8BE-NEXT: lis r5, 25653
1630 ; P8BE-NEXT: xxswapd vs1, v2
1631 ; P8BE-NEXT: ori r4, r4, 17096
1632 ; P8BE-NEXT: ori r3, r3, 47142
1633 ; P8BE-NEXT: ori r5, r5, 15432
1634 ; P8BE-NEXT: mfvsrd r6, v3
1635 ; P8BE-NEXT: sldi r4, r4, 32
1636 ; P8BE-NEXT: sldi r3, r3, 32
1637 ; P8BE-NEXT: oris r4, r4, 22795
1638 ; P8BE-NEXT: sldi r5, r5, 32
1639 ; P8BE-NEXT: oris r3, r3, 58853
1640 ; P8BE-NEXT: mfvsrd r7, f0
1641 ; P8BE-NEXT: ori r4, r4, 8549
1642 ; P8BE-NEXT: ori r3, r3, 6055
1643 ; P8BE-NEXT: oris r5, r5, 1603
1644 ; P8BE-NEXT: mfvsrd r8, f1
1645 ; P8BE-NEXT: mulhd r4, r6, r4
1646 ; P8BE-NEXT: mulhd r3, r7, r3
1647 ; P8BE-NEXT: ori r5, r5, 21445
1648 ; P8BE-NEXT: mulhd r5, r8, r5
1649 ; P8BE-NEXT: add r4, r4, r6
1650 ; P8BE-NEXT: rldicl r9, r3, 1, 63
1651 ; P8BE-NEXT: sradi r3, r3, 11
1652 ; P8BE-NEXT: rldicl r10, r4, 1, 63
1653 ; P8BE-NEXT: sradi r4, r4, 4
1654 ; P8BE-NEXT: add r3, r3, r9
1655 ; P8BE-NEXT: rldicl r9, r5, 1, 63
1656 ; P8BE-NEXT: add r4, r4, r10
1657 ; P8BE-NEXT: sradi r5, r5, 8
1658 ; P8BE-NEXT: mulli r3, r3, 5423
1659 ; P8BE-NEXT: add r5, r5, r9
1660 ; P8BE-NEXT: mulli r4, r4, 23
1661 ; P8BE-NEXT: mulli r5, r5, 654
1662 ; P8BE-NEXT: sub r3, r7, r3
1663 ; P8BE-NEXT: sub r4, r6, r4
1664 ; P8BE-NEXT: mtvsrd f0, r3
1665 ; P8BE-NEXT: sub r3, r8, r5
1666 ; P8BE-NEXT: mtvsrd f1, r4
1667 ; P8BE-NEXT: li r4, 0
1668 ; P8BE-NEXT: mtvsrd f2, r3
1669 ; P8BE-NEXT: mtvsrd f3, r4
1670 ; P8BE-NEXT: xxmrghd v3, vs1, vs0
1671 ; P8BE-NEXT: xxmrghd v2, vs3, vs2
1673 %1 = srem <4 x i64> %x, <i64 1, i64 654, i64 23, i64 5423>