1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \
3 ; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s
4 ; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \
5 ; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s \
6 ; RUN: -check-prefix=CHECK-P8
8 define void @qpFmadd(ptr nocapture readonly %a, ptr nocapture %b,
9 ; CHECK-LABEL: qpFmadd:
10 ; CHECK: # %bb.0: # %entry
11 ; CHECK-NEXT: lxv v2, 0(r3)
12 ; CHECK-NEXT: lxv v3, 0(r4)
13 ; CHECK-NEXT: lxv v4, 0(r5)
14 ; CHECK-NEXT: xsmaddqp v4, v2, v3
15 ; CHECK-NEXT: stxv v4, 0(r6)
18 ; CHECK-P8-LABEL: qpFmadd:
19 ; CHECK-P8: # %bb.0: # %entry
20 ; CHECK-P8-NEXT: mflr r0
21 ; CHECK-P8-NEXT: stdu r1, -80(r1)
22 ; CHECK-P8-NEXT: std r0, 96(r1)
23 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 80
24 ; CHECK-P8-NEXT: .cfi_offset lr, 16
25 ; CHECK-P8-NEXT: .cfi_offset r30, -16
26 ; CHECK-P8-NEXT: .cfi_offset v31, -32
27 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
28 ; CHECK-P8-NEXT: li r7, 48
29 ; CHECK-P8-NEXT: std r30, 64(r1) # 8-byte Folded Spill
30 ; CHECK-P8-NEXT: mr r30, r6
31 ; CHECK-P8-NEXT: stvx v31, r1, r7 # 16-byte Folded Spill
32 ; CHECK-P8-NEXT: xxswapd v2, vs0
33 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
34 ; CHECK-P8-NEXT: xxswapd v3, vs0
35 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r5
36 ; CHECK-P8-NEXT: xxswapd v31, vs0
37 ; CHECK-P8-NEXT: bl __mulkf3
39 ; CHECK-P8-NEXT: vmr v3, v31
40 ; CHECK-P8-NEXT: bl __addkf3
42 ; CHECK-P8-NEXT: li r3, 48
43 ; CHECK-P8-NEXT: xxswapd vs0, v2
44 ; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
45 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
46 ; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload
47 ; CHECK-P8-NEXT: addi r1, r1, 80
48 ; CHECK-P8-NEXT: ld r0, 16(r1)
49 ; CHECK-P8-NEXT: mtlr r0
51 ptr nocapture readonly %c, ptr nocapture %res) {
53 %0 = load fp128, ptr %a, align 16
54 %1 = load fp128, ptr %b, align 16
55 %2 = load fp128, ptr %c, align 16
56 %madd = tail call fp128 @llvm.fmuladd.f128(fp128 %0, fp128 %1, fp128 %2)
57 store fp128 %madd, ptr %res, align 16
60 declare fp128 @llvm.fmuladd.f128(fp128, fp128, fp128)
62 ; Function Attrs: norecurse nounwind
63 define void @qpFmadd_02(ptr nocapture readonly %a,
64 ; CHECK-LABEL: qpFmadd_02:
65 ; CHECK: # %bb.0: # %entry
66 ; CHECK-NEXT: lxv v2, 0(r3)
67 ; CHECK-NEXT: lxv v3, 0(r4)
68 ; CHECK-NEXT: lxv v4, 0(r5)
69 ; CHECK-NEXT: xsmaddqp v2, v3, v4
70 ; CHECK-NEXT: stxv v2, 0(r6)
73 ; CHECK-P8-LABEL: qpFmadd_02:
74 ; CHECK-P8: # %bb.0: # %entry
75 ; CHECK-P8-NEXT: mflr r0
76 ; CHECK-P8-NEXT: stdu r1, -80(r1)
77 ; CHECK-P8-NEXT: std r0, 96(r1)
78 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 80
79 ; CHECK-P8-NEXT: .cfi_offset lr, 16
80 ; CHECK-P8-NEXT: .cfi_offset r30, -16
81 ; CHECK-P8-NEXT: .cfi_offset v31, -32
82 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
83 ; CHECK-P8-NEXT: li r7, 48
84 ; CHECK-P8-NEXT: std r30, 64(r1) # 8-byte Folded Spill
85 ; CHECK-P8-NEXT: mr r30, r6
86 ; CHECK-P8-NEXT: stvx v31, r1, r7 # 16-byte Folded Spill
87 ; CHECK-P8-NEXT: xxswapd v31, vs0
88 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
89 ; CHECK-P8-NEXT: xxswapd v2, vs0
90 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r5
91 ; CHECK-P8-NEXT: xxswapd v3, vs0
92 ; CHECK-P8-NEXT: bl __mulkf3
94 ; CHECK-P8-NEXT: vmr v3, v2
95 ; CHECK-P8-NEXT: vmr v2, v31
96 ; CHECK-P8-NEXT: bl __addkf3
98 ; CHECK-P8-NEXT: li r3, 48
99 ; CHECK-P8-NEXT: xxswapd vs0, v2
100 ; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
101 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
102 ; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload
103 ; CHECK-P8-NEXT: addi r1, r1, 80
104 ; CHECK-P8-NEXT: ld r0, 16(r1)
105 ; CHECK-P8-NEXT: mtlr r0
107 ptr nocapture readonly %b,
108 ptr nocapture readonly %c, ptr nocapture %res) {
110 %0 = load fp128, ptr %a, align 16
111 %1 = load fp128, ptr %b, align 16
112 %2 = load fp128, ptr %c, align 16
113 %mul = fmul contract fp128 %1, %2
114 %add = fadd contract fp128 %0, %mul
115 store fp128 %add, ptr %res, align 16
119 ; Function Attrs: norecurse nounwind
120 define void @qpFmadd_03(ptr nocapture readonly %a,
121 ; CHECK-LABEL: qpFmadd_03:
122 ; CHECK: # %bb.0: # %entry
123 ; CHECK-NEXT: lxv v2, 0(r3)
124 ; CHECK-NEXT: lxv v3, 0(r4)
125 ; CHECK-NEXT: lxv v4, 0(r5)
126 ; CHECK-NEXT: xsmaddqp v4, v2, v3
127 ; CHECK-NEXT: stxv v4, 0(r6)
130 ; CHECK-P8-LABEL: qpFmadd_03:
131 ; CHECK-P8: # %bb.0: # %entry
132 ; CHECK-P8-NEXT: mflr r0
133 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 64
134 ; CHECK-P8-NEXT: .cfi_offset lr, 16
135 ; CHECK-P8-NEXT: .cfi_offset r29, -24
136 ; CHECK-P8-NEXT: .cfi_offset r30, -16
137 ; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill
138 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
139 ; CHECK-P8-NEXT: stdu r1, -64(r1)
140 ; CHECK-P8-NEXT: std r0, 80(r1)
141 ; CHECK-P8-NEXT: mr r30, r6
142 ; CHECK-P8-NEXT: mr r29, r5
143 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
144 ; CHECK-P8-NEXT: xxswapd v2, vs0
145 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
146 ; CHECK-P8-NEXT: xxswapd v3, vs0
147 ; CHECK-P8-NEXT: bl __mulkf3
149 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r29
150 ; CHECK-P8-NEXT: xxswapd v3, vs0
151 ; CHECK-P8-NEXT: bl __addkf3
153 ; CHECK-P8-NEXT: xxswapd vs0, v2
154 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
155 ; CHECK-P8-NEXT: addi r1, r1, 64
156 ; CHECK-P8-NEXT: ld r0, 16(r1)
157 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
158 ; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
159 ; CHECK-P8-NEXT: mtlr r0
161 ptr nocapture readonly %b,
162 ptr nocapture readonly %c, ptr nocapture %res) {
164 %0 = load fp128, ptr %a, align 16
165 %1 = load fp128, ptr %b, align 16
166 %mul = fmul contract fp128 %0, %1
167 %2 = load fp128, ptr %c, align 16
168 %add = fadd contract fp128 %mul, %2
169 store fp128 %add, ptr %res, align 16
173 ; Function Attrs: norecurse nounwind
174 define void @qpFnmadd(ptr nocapture readonly %a,
175 ; CHECK-LABEL: qpFnmadd:
176 ; CHECK: # %bb.0: # %entry
177 ; CHECK-NEXT: lxv v2, 0(r3)
178 ; CHECK-NEXT: lxv v3, 0(r4)
179 ; CHECK-NEXT: lxv v4, 0(r5)
180 ; CHECK-NEXT: xsnmaddqp v2, v3, v4
181 ; CHECK-NEXT: stxv v2, 0(r6)
184 ; CHECK-P8-LABEL: qpFnmadd:
185 ; CHECK-P8: # %bb.0: # %entry
186 ; CHECK-P8-NEXT: mflr r0
187 ; CHECK-P8-NEXT: stdu r1, -96(r1)
188 ; CHECK-P8-NEXT: std r0, 112(r1)
189 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 96
190 ; CHECK-P8-NEXT: .cfi_offset lr, 16
191 ; CHECK-P8-NEXT: .cfi_offset r30, -16
192 ; CHECK-P8-NEXT: .cfi_offset v31, -32
193 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
194 ; CHECK-P8-NEXT: li r7, 64
195 ; CHECK-P8-NEXT: std r30, 80(r1) # 8-byte Folded Spill
196 ; CHECK-P8-NEXT: mr r30, r6
197 ; CHECK-P8-NEXT: stvx v31, r1, r7 # 16-byte Folded Spill
198 ; CHECK-P8-NEXT: xxswapd v31, vs0
199 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
200 ; CHECK-P8-NEXT: xxswapd v2, vs0
201 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r5
202 ; CHECK-P8-NEXT: xxswapd v3, vs0
203 ; CHECK-P8-NEXT: bl __mulkf3
205 ; CHECK-P8-NEXT: vmr v3, v2
206 ; CHECK-P8-NEXT: vmr v2, v31
207 ; CHECK-P8-NEXT: bl __addkf3
209 ; CHECK-P8-NEXT: xxswapd vs0, v2
210 ; CHECK-P8-NEXT: addi r3, r1, 48
211 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
212 ; CHECK-P8-NEXT: lbz r4, 63(r1)
213 ; CHECK-P8-NEXT: xori r4, r4, 128
214 ; CHECK-P8-NEXT: stb r4, 63(r1)
215 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
216 ; CHECK-P8-NEXT: li r3, 64
217 ; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
218 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
219 ; CHECK-P8-NEXT: ld r30, 80(r1) # 8-byte Folded Reload
220 ; CHECK-P8-NEXT: addi r1, r1, 96
221 ; CHECK-P8-NEXT: ld r0, 16(r1)
222 ; CHECK-P8-NEXT: mtlr r0
224 ptr nocapture readonly %b,
225 ptr nocapture readonly %c, ptr nocapture %res) {
227 %0 = load fp128, ptr %a, align 16
228 %1 = load fp128, ptr %b, align 16
229 %2 = load fp128, ptr %c, align 16
230 %mul = fmul contract fp128 %1, %2
231 %add = fadd contract fp128 %0, %mul
232 %sub = fsub fp128 0xL00000000000000008000000000000000, %add
233 store fp128 %sub, ptr %res, align 16
237 ; Function Attrs: norecurse nounwind
238 define void @qpFnmadd_02(ptr nocapture readonly %a,
239 ; CHECK-LABEL: qpFnmadd_02:
240 ; CHECK: # %bb.0: # %entry
241 ; CHECK-NEXT: lxv v2, 0(r3)
242 ; CHECK-NEXT: lxv v3, 0(r4)
243 ; CHECK-NEXT: lxv v4, 0(r5)
244 ; CHECK-NEXT: xsnmaddqp v4, v2, v3
245 ; CHECK-NEXT: stxv v4, 0(r6)
248 ; CHECK-P8-LABEL: qpFnmadd_02:
249 ; CHECK-P8: # %bb.0: # %entry
250 ; CHECK-P8-NEXT: mflr r0
251 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 80
252 ; CHECK-P8-NEXT: .cfi_offset lr, 16
253 ; CHECK-P8-NEXT: .cfi_offset r29, -24
254 ; CHECK-P8-NEXT: .cfi_offset r30, -16
255 ; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill
256 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
257 ; CHECK-P8-NEXT: stdu r1, -80(r1)
258 ; CHECK-P8-NEXT: std r0, 96(r1)
259 ; CHECK-P8-NEXT: mr r30, r6
260 ; CHECK-P8-NEXT: mr r29, r5
261 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
262 ; CHECK-P8-NEXT: xxswapd v2, vs0
263 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
264 ; CHECK-P8-NEXT: xxswapd v3, vs0
265 ; CHECK-P8-NEXT: bl __mulkf3
267 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r29
268 ; CHECK-P8-NEXT: xxswapd v3, vs0
269 ; CHECK-P8-NEXT: bl __addkf3
271 ; CHECK-P8-NEXT: xxswapd vs0, v2
272 ; CHECK-P8-NEXT: addi r3, r1, 32
273 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
274 ; CHECK-P8-NEXT: lbz r4, 47(r1)
275 ; CHECK-P8-NEXT: xori r4, r4, 128
276 ; CHECK-P8-NEXT: stb r4, 47(r1)
277 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
278 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
279 ; CHECK-P8-NEXT: addi r1, r1, 80
280 ; CHECK-P8-NEXT: ld r0, 16(r1)
281 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
282 ; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
283 ; CHECK-P8-NEXT: mtlr r0
285 ptr nocapture readonly %b,
286 ptr nocapture readonly %c, ptr nocapture %res) {
288 %0 = load fp128, ptr %a, align 16
289 %1 = load fp128, ptr %b, align 16
290 %mul = fmul contract fp128 %0, %1
291 %2 = load fp128, ptr %c, align 16
292 %add = fadd contract fp128 %mul, %2
293 %sub = fsub fp128 0xL00000000000000008000000000000000, %add
294 store fp128 %sub, ptr %res, align 16
298 ; Function Attrs: norecurse nounwind
299 define void @qpFmsub(ptr nocapture readonly %a,
300 ; CHECK-LABEL: qpFmsub:
301 ; CHECK: # %bb.0: # %entry
302 ; CHECK-NEXT: lxv v2, 0(r3)
303 ; CHECK-NEXT: lxv v3, 0(r4)
304 ; CHECK-NEXT: lxv v4, 0(r5)
305 ; CHECK-NEXT: xsnmsubqp v2, v3, v4
306 ; CHECK-NEXT: stxv v2, 0(r6)
309 ; CHECK-P8-LABEL: qpFmsub:
310 ; CHECK-P8: # %bb.0: # %entry
311 ; CHECK-P8-NEXT: mflr r0
312 ; CHECK-P8-NEXT: stdu r1, -80(r1)
313 ; CHECK-P8-NEXT: std r0, 96(r1)
314 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 80
315 ; CHECK-P8-NEXT: .cfi_offset lr, 16
316 ; CHECK-P8-NEXT: .cfi_offset r30, -16
317 ; CHECK-P8-NEXT: .cfi_offset v31, -32
318 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
319 ; CHECK-P8-NEXT: li r7, 48
320 ; CHECK-P8-NEXT: std r30, 64(r1) # 8-byte Folded Spill
321 ; CHECK-P8-NEXT: mr r30, r6
322 ; CHECK-P8-NEXT: stvx v31, r1, r7 # 16-byte Folded Spill
323 ; CHECK-P8-NEXT: xxswapd v31, vs0
324 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
325 ; CHECK-P8-NEXT: xxswapd v2, vs0
326 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r5
327 ; CHECK-P8-NEXT: xxswapd v3, vs0
328 ; CHECK-P8-NEXT: bl __mulkf3
330 ; CHECK-P8-NEXT: vmr v3, v2
331 ; CHECK-P8-NEXT: vmr v2, v31
332 ; CHECK-P8-NEXT: bl __subkf3
334 ; CHECK-P8-NEXT: li r3, 48
335 ; CHECK-P8-NEXT: xxswapd vs0, v2
336 ; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
337 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
338 ; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload
339 ; CHECK-P8-NEXT: addi r1, r1, 80
340 ; CHECK-P8-NEXT: ld r0, 16(r1)
341 ; CHECK-P8-NEXT: mtlr r0
343 ptr nocapture readonly %b,
344 ptr nocapture readonly %c, ptr nocapture %res) {
346 %0 = load fp128, ptr %a, align 16
347 %1 = load fp128, ptr %b, align 16
348 %2 = load fp128, ptr %c, align 16
349 %mul = fmul contract fp128 %1, %2
350 %sub = fsub contract nsz fp128 %0, %mul
351 store fp128 %sub, ptr %res, align 16
355 ; Function Attrs: norecurse nounwind
356 define void @qpFmsub_02(ptr nocapture readonly %a,
357 ; CHECK-LABEL: qpFmsub_02:
358 ; CHECK: # %bb.0: # %entry
359 ; CHECK-NEXT: lxv v2, 0(r3)
360 ; CHECK-NEXT: lxv v3, 0(r4)
361 ; CHECK-NEXT: lxv v4, 0(r5)
362 ; CHECK-NEXT: xsmsubqp v4, v2, v3
363 ; CHECK-NEXT: stxv v4, 0(r6)
366 ; CHECK-P8-LABEL: qpFmsub_02:
367 ; CHECK-P8: # %bb.0: # %entry
368 ; CHECK-P8-NEXT: mflr r0
369 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 64
370 ; CHECK-P8-NEXT: .cfi_offset lr, 16
371 ; CHECK-P8-NEXT: .cfi_offset r29, -24
372 ; CHECK-P8-NEXT: .cfi_offset r30, -16
373 ; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill
374 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
375 ; CHECK-P8-NEXT: stdu r1, -64(r1)
376 ; CHECK-P8-NEXT: std r0, 80(r1)
377 ; CHECK-P8-NEXT: mr r30, r6
378 ; CHECK-P8-NEXT: mr r29, r5
379 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
380 ; CHECK-P8-NEXT: xxswapd v2, vs0
381 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
382 ; CHECK-P8-NEXT: xxswapd v3, vs0
383 ; CHECK-P8-NEXT: bl __mulkf3
385 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r29
386 ; CHECK-P8-NEXT: xxswapd v3, vs0
387 ; CHECK-P8-NEXT: bl __subkf3
389 ; CHECK-P8-NEXT: xxswapd vs0, v2
390 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
391 ; CHECK-P8-NEXT: addi r1, r1, 64
392 ; CHECK-P8-NEXT: ld r0, 16(r1)
393 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
394 ; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
395 ; CHECK-P8-NEXT: mtlr r0
397 ptr nocapture readonly %b,
398 ptr nocapture readonly %c, ptr nocapture %res) {
400 %0 = load fp128, ptr %a, align 16
401 %1 = load fp128, ptr %b, align 16
402 %mul = fmul contract fp128 %0, %1
403 %2 = load fp128, ptr %c, align 16
404 %sub = fsub contract fp128 %mul, %2
405 store fp128 %sub, ptr %res, align 16
409 ; Function Attrs: norecurse nounwind
410 define void @qpFnmsub(ptr nocapture readonly %a,
411 ; CHECK-LABEL: qpFnmsub:
412 ; CHECK: # %bb.0: # %entry
413 ; CHECK-NEXT: lxv v3, 0(r4)
414 ; CHECK-NEXT: lxv v2, 0(r3)
415 ; CHECK-NEXT: lxv v4, 0(r5)
416 ; CHECK-NEXT: xsnegqp v3, v3
417 ; CHECK-NEXT: xsnmaddqp v2, v3, v4
418 ; CHECK-NEXT: stxv v2, 0(r6)
421 ; CHECK-P8-LABEL: qpFnmsub:
422 ; CHECK-P8: # %bb.0: # %entry
423 ; CHECK-P8-NEXT: mflr r0
424 ; CHECK-P8-NEXT: stdu r1, -96(r1)
425 ; CHECK-P8-NEXT: std r0, 112(r1)
426 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 96
427 ; CHECK-P8-NEXT: .cfi_offset lr, 16
428 ; CHECK-P8-NEXT: .cfi_offset r30, -16
429 ; CHECK-P8-NEXT: .cfi_offset v31, -32
430 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
431 ; CHECK-P8-NEXT: li r7, 64
432 ; CHECK-P8-NEXT: std r30, 80(r1) # 8-byte Folded Spill
433 ; CHECK-P8-NEXT: mr r30, r6
434 ; CHECK-P8-NEXT: stvx v31, r1, r7 # 16-byte Folded Spill
435 ; CHECK-P8-NEXT: xxswapd v31, vs0
436 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
437 ; CHECK-P8-NEXT: xxswapd v2, vs0
438 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r5
439 ; CHECK-P8-NEXT: xxswapd v3, vs0
440 ; CHECK-P8-NEXT: bl __mulkf3
442 ; CHECK-P8-NEXT: vmr v3, v2
443 ; CHECK-P8-NEXT: vmr v2, v31
444 ; CHECK-P8-NEXT: bl __subkf3
446 ; CHECK-P8-NEXT: xxswapd vs0, v2
447 ; CHECK-P8-NEXT: addi r3, r1, 48
448 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
449 ; CHECK-P8-NEXT: lbz r4, 63(r1)
450 ; CHECK-P8-NEXT: xori r4, r4, 128
451 ; CHECK-P8-NEXT: stb r4, 63(r1)
452 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
453 ; CHECK-P8-NEXT: li r3, 64
454 ; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
455 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
456 ; CHECK-P8-NEXT: ld r30, 80(r1) # 8-byte Folded Reload
457 ; CHECK-P8-NEXT: addi r1, r1, 96
458 ; CHECK-P8-NEXT: ld r0, 16(r1)
459 ; CHECK-P8-NEXT: mtlr r0
461 ptr nocapture readonly %b,
462 ptr nocapture readonly %c, ptr nocapture %res) {
464 %0 = load fp128, ptr %a, align 16
465 %1 = load fp128, ptr %b, align 16
466 %2 = load fp128, ptr %c, align 16
467 %mul = fmul contract fp128 %1, %2
468 %sub = fsub contract fp128 %0, %mul
469 %sub1 = fsub fp128 0xL00000000000000008000000000000000, %sub
470 store fp128 %sub1, ptr %res, align 16
474 ; Function Attrs: norecurse nounwind
475 define void @qpFnmsub_02(ptr nocapture readonly %a,
476 ; CHECK-LABEL: qpFnmsub_02:
477 ; CHECK: # %bb.0: # %entry
478 ; CHECK-NEXT: lxv v2, 0(r3)
479 ; CHECK-NEXT: lxv v3, 0(r4)
480 ; CHECK-NEXT: lxv v4, 0(r5)
481 ; CHECK-NEXT: xsnmsubqp v4, v2, v3
482 ; CHECK-NEXT: stxv v4, 0(r6)
485 ; CHECK-P8-LABEL: qpFnmsub_02:
486 ; CHECK-P8: # %bb.0: # %entry
487 ; CHECK-P8-NEXT: mflr r0
488 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 80
489 ; CHECK-P8-NEXT: .cfi_offset lr, 16
490 ; CHECK-P8-NEXT: .cfi_offset r29, -24
491 ; CHECK-P8-NEXT: .cfi_offset r30, -16
492 ; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill
493 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
494 ; CHECK-P8-NEXT: stdu r1, -80(r1)
495 ; CHECK-P8-NEXT: std r0, 96(r1)
496 ; CHECK-P8-NEXT: mr r30, r6
497 ; CHECK-P8-NEXT: mr r29, r5
498 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
499 ; CHECK-P8-NEXT: xxswapd v2, vs0
500 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
501 ; CHECK-P8-NEXT: xxswapd v3, vs0
502 ; CHECK-P8-NEXT: bl __mulkf3
504 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r29
505 ; CHECK-P8-NEXT: xxswapd v3, vs0
506 ; CHECK-P8-NEXT: bl __subkf3
508 ; CHECK-P8-NEXT: xxswapd vs0, v2
509 ; CHECK-P8-NEXT: addi r3, r1, 32
510 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
511 ; CHECK-P8-NEXT: lbz r4, 47(r1)
512 ; CHECK-P8-NEXT: xori r4, r4, 128
513 ; CHECK-P8-NEXT: stb r4, 47(r1)
514 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
515 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
516 ; CHECK-P8-NEXT: addi r1, r1, 80
517 ; CHECK-P8-NEXT: ld r0, 16(r1)
518 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
519 ; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
520 ; CHECK-P8-NEXT: mtlr r0
522 ptr nocapture readonly %b,
523 ptr nocapture readonly %c, ptr nocapture %res) {
525 %0 = load fp128, ptr %a, align 16
526 %1 = load fp128, ptr %b, align 16
527 %mul = fmul contract fp128 %0, %1
528 %2 = load fp128, ptr %c, align 16
529 %sub = fsub contract fp128 %mul, %2
530 %sub1 = fsub fp128 0xL00000000000000008000000000000000, %sub
531 store fp128 %sub1, ptr %res, align 16