1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \
3 ; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s
4 ; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \
5 ; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s \
6 ; RUN: -check-prefix=CHECK-P8
8 define void @qpFmadd(fp128* nocapture readonly %a, fp128* nocapture %b,
9 ; CHECK-LABEL: qpFmadd:
10 ; CHECK: # %bb.0: # %entry
11 ; CHECK-NEXT: lxv v2, 0(r3)
12 ; CHECK-NEXT: lxv v3, 0(r4)
13 ; CHECK-NEXT: lxv v4, 0(r5)
14 ; CHECK-NEXT: xsmaddqp v4, v2, v3
15 ; CHECK-NEXT: stxv v4, 0(r6)
18 ; CHECK-P8-LABEL: qpFmadd:
19 ; CHECK-P8: # %bb.0: # %entry
20 ; CHECK-P8-NEXT: mflr r0
21 ; CHECK-P8-NEXT: std r0, 16(r1)
22 ; CHECK-P8-NEXT: stdu r1, -80(r1)
23 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 80
24 ; CHECK-P8-NEXT: .cfi_offset lr, 16
25 ; CHECK-P8-NEXT: .cfi_offset r30, -16
26 ; CHECK-P8-NEXT: .cfi_offset v31, -32
27 ; CHECK-P8-NEXT: li r7, 48
28 ; CHECK-P8-NEXT: lvx v2, 0, r3
29 ; CHECK-P8-NEXT: lvx v3, 0, r4
30 ; CHECK-P8-NEXT: std r30, 64(r1) # 8-byte Folded Spill
31 ; CHECK-P8-NEXT: mr r30, r6
32 ; CHECK-P8-NEXT: stvx v31, r1, r7 # 16-byte Folded Spill
33 ; CHECK-P8-NEXT: lvx v31, 0, r5
34 ; CHECK-P8-NEXT: bl __mulkf3
36 ; CHECK-P8-NEXT: vmr v3, v31
37 ; CHECK-P8-NEXT: bl __addkf3
39 ; CHECK-P8-NEXT: li r3, 48
40 ; CHECK-P8-NEXT: stvx v2, 0, r30
41 ; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload
42 ; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
43 ; CHECK-P8-NEXT: addi r1, r1, 80
44 ; CHECK-P8-NEXT: ld r0, 16(r1)
45 ; CHECK-P8-NEXT: mtlr r0
47 fp128* nocapture readonly %c, fp128* nocapture %res) {
49 %0 = load fp128, fp128* %a, align 16
50 %1 = load fp128, fp128* %b, align 16
51 %2 = load fp128, fp128* %c, align 16
52 %madd = tail call fp128 @llvm.fmuladd.f128(fp128 %0, fp128 %1, fp128 %2)
53 store fp128 %madd, fp128* %res, align 16
56 declare fp128 @llvm.fmuladd.f128(fp128, fp128, fp128)
58 ; Function Attrs: norecurse nounwind
59 define void @qpFmadd_02(fp128* nocapture readonly %a,
60 ; CHECK-LABEL: qpFmadd_02:
61 ; CHECK: # %bb.0: # %entry
62 ; CHECK-NEXT: lxv v2, 0(r3)
63 ; CHECK-NEXT: lxv v3, 0(r4)
64 ; CHECK-NEXT: lxv v4, 0(r5)
65 ; CHECK-NEXT: xsmaddqp v2, v3, v4
66 ; CHECK-NEXT: stxv v2, 0(r6)
69 ; CHECK-P8-LABEL: qpFmadd_02:
70 ; CHECK-P8: # %bb.0: # %entry
71 ; CHECK-P8-NEXT: mflr r0
72 ; CHECK-P8-NEXT: std r0, 16(r1)
73 ; CHECK-P8-NEXT: stdu r1, -80(r1)
74 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 80
75 ; CHECK-P8-NEXT: .cfi_offset lr, 16
76 ; CHECK-P8-NEXT: .cfi_offset r30, -16
77 ; CHECK-P8-NEXT: .cfi_offset v31, -32
78 ; CHECK-P8-NEXT: li r7, 48
79 ; CHECK-P8-NEXT: lvx v2, 0, r4
80 ; CHECK-P8-NEXT: lvx v3, 0, r5
81 ; CHECK-P8-NEXT: std r30, 64(r1) # 8-byte Folded Spill
82 ; CHECK-P8-NEXT: mr r30, r6
83 ; CHECK-P8-NEXT: stvx v31, r1, r7 # 16-byte Folded Spill
84 ; CHECK-P8-NEXT: lvx v31, 0, r3
85 ; CHECK-P8-NEXT: bl __mulkf3
87 ; CHECK-P8-NEXT: vmr v3, v2
88 ; CHECK-P8-NEXT: vmr v2, v31
89 ; CHECK-P8-NEXT: bl __addkf3
91 ; CHECK-P8-NEXT: li r3, 48
92 ; CHECK-P8-NEXT: stvx v2, 0, r30
93 ; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload
94 ; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
95 ; CHECK-P8-NEXT: addi r1, r1, 80
96 ; CHECK-P8-NEXT: ld r0, 16(r1)
97 ; CHECK-P8-NEXT: mtlr r0
99 fp128* nocapture readonly %b,
100 fp128* nocapture readonly %c, fp128* nocapture %res) {
102 %0 = load fp128, fp128* %a, align 16
103 %1 = load fp128, fp128* %b, align 16
104 %2 = load fp128, fp128* %c, align 16
105 %mul = fmul contract fp128 %1, %2
106 %add = fadd contract fp128 %0, %mul
107 store fp128 %add, fp128* %res, align 16
111 ; Function Attrs: norecurse nounwind
112 define void @qpFmadd_03(fp128* nocapture readonly %a,
113 ; CHECK-LABEL: qpFmadd_03:
114 ; CHECK: # %bb.0: # %entry
115 ; CHECK-NEXT: lxv v2, 0(r3)
116 ; CHECK-NEXT: lxv v3, 0(r4)
117 ; CHECK-NEXT: lxv v4, 0(r5)
118 ; CHECK-NEXT: xsmaddqp v4, v2, v3
119 ; CHECK-NEXT: stxv v4, 0(r6)
122 ; CHECK-P8-LABEL: qpFmadd_03:
123 ; CHECK-P8: # %bb.0: # %entry
124 ; CHECK-P8-NEXT: mflr r0
125 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 64
126 ; CHECK-P8-NEXT: .cfi_offset lr, 16
127 ; CHECK-P8-NEXT: .cfi_offset r29, -24
128 ; CHECK-P8-NEXT: .cfi_offset r30, -16
129 ; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill
130 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
131 ; CHECK-P8-NEXT: std r0, 16(r1)
132 ; CHECK-P8-NEXT: stdu r1, -64(r1)
133 ; CHECK-P8-NEXT: lvx v2, 0, r3
134 ; CHECK-P8-NEXT: lvx v3, 0, r4
135 ; CHECK-P8-NEXT: mr r30, r6
136 ; CHECK-P8-NEXT: mr r29, r5
137 ; CHECK-P8-NEXT: bl __mulkf3
139 ; CHECK-P8-NEXT: lvx v3, 0, r29
140 ; CHECK-P8-NEXT: bl __addkf3
142 ; CHECK-P8-NEXT: stvx v2, 0, r30
143 ; CHECK-P8-NEXT: addi r1, r1, 64
144 ; CHECK-P8-NEXT: ld r0, 16(r1)
145 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
146 ; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
147 ; CHECK-P8-NEXT: mtlr r0
149 fp128* nocapture readonly %b,
150 fp128* nocapture readonly %c, fp128* nocapture %res) {
152 %0 = load fp128, fp128* %a, align 16
153 %1 = load fp128, fp128* %b, align 16
154 %mul = fmul contract fp128 %0, %1
155 %2 = load fp128, fp128* %c, align 16
156 %add = fadd contract fp128 %mul, %2
157 store fp128 %add, fp128* %res, align 16
161 ; Function Attrs: norecurse nounwind
162 define void @qpFnmadd(fp128* nocapture readonly %a,
163 ; CHECK-LABEL: qpFnmadd:
164 ; CHECK: # %bb.0: # %entry
165 ; CHECK-NEXT: lxv v2, 0(r3)
166 ; CHECK-NEXT: lxv v3, 0(r4)
167 ; CHECK-NEXT: lxv v4, 0(r5)
168 ; CHECK-NEXT: xsnmaddqp v2, v3, v4
169 ; CHECK-NEXT: stxv v2, 0(r6)
172 ; CHECK-P8-LABEL: qpFnmadd:
173 ; CHECK-P8: # %bb.0: # %entry
174 ; CHECK-P8-NEXT: mflr r0
175 ; CHECK-P8-NEXT: std r0, 16(r1)
176 ; CHECK-P8-NEXT: stdu r1, -96(r1)
177 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 96
178 ; CHECK-P8-NEXT: .cfi_offset lr, 16
179 ; CHECK-P8-NEXT: .cfi_offset r30, -16
180 ; CHECK-P8-NEXT: .cfi_offset v31, -32
181 ; CHECK-P8-NEXT: li r7, 64
182 ; CHECK-P8-NEXT: lvx v2, 0, r4
183 ; CHECK-P8-NEXT: lvx v3, 0, r5
184 ; CHECK-P8-NEXT: std r30, 80(r1) # 8-byte Folded Spill
185 ; CHECK-P8-NEXT: mr r30, r6
186 ; CHECK-P8-NEXT: stvx v31, r1, r7 # 16-byte Folded Spill
187 ; CHECK-P8-NEXT: lvx v31, 0, r3
188 ; CHECK-P8-NEXT: bl __mulkf3
190 ; CHECK-P8-NEXT: vmr v3, v2
191 ; CHECK-P8-NEXT: vmr v2, v31
192 ; CHECK-P8-NEXT: bl __addkf3
194 ; CHECK-P8-NEXT: addi r3, r1, 48
195 ; CHECK-P8-NEXT: stvx v2, 0, r3
196 ; CHECK-P8-NEXT: lbz r4, 63(r1)
197 ; CHECK-P8-NEXT: xori r4, r4, 128
198 ; CHECK-P8-NEXT: stb r4, 63(r1)
199 ; CHECK-P8-NEXT: lvx v2, 0, r3
200 ; CHECK-P8-NEXT: li r3, 64
201 ; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
202 ; CHECK-P8-NEXT: stvx v2, 0, r30
203 ; CHECK-P8-NEXT: ld r30, 80(r1) # 8-byte Folded Reload
204 ; CHECK-P8-NEXT: addi r1, r1, 96
205 ; CHECK-P8-NEXT: ld r0, 16(r1)
206 ; CHECK-P8-NEXT: mtlr r0
208 fp128* nocapture readonly %b,
209 fp128* nocapture readonly %c, fp128* nocapture %res) {
211 %0 = load fp128, fp128* %a, align 16
212 %1 = load fp128, fp128* %b, align 16
213 %2 = load fp128, fp128* %c, align 16
214 %mul = fmul contract fp128 %1, %2
215 %add = fadd contract fp128 %0, %mul
216 %sub = fsub fp128 0xL00000000000000008000000000000000, %add
217 store fp128 %sub, fp128* %res, align 16
221 ; Function Attrs: norecurse nounwind
222 define void @qpFnmadd_02(fp128* nocapture readonly %a,
223 ; CHECK-LABEL: qpFnmadd_02:
224 ; CHECK: # %bb.0: # %entry
225 ; CHECK-NEXT: lxv v2, 0(r3)
226 ; CHECK-NEXT: lxv v3, 0(r4)
227 ; CHECK-NEXT: lxv v4, 0(r5)
228 ; CHECK-NEXT: xsnmaddqp v4, v2, v3
229 ; CHECK-NEXT: stxv v4, 0(r6)
232 ; CHECK-P8-LABEL: qpFnmadd_02:
233 ; CHECK-P8: # %bb.0: # %entry
234 ; CHECK-P8-NEXT: mflr r0
235 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 80
236 ; CHECK-P8-NEXT: .cfi_offset lr, 16
237 ; CHECK-P8-NEXT: .cfi_offset r29, -24
238 ; CHECK-P8-NEXT: .cfi_offset r30, -16
239 ; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill
240 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
241 ; CHECK-P8-NEXT: std r0, 16(r1)
242 ; CHECK-P8-NEXT: stdu r1, -80(r1)
243 ; CHECK-P8-NEXT: lvx v2, 0, r3
244 ; CHECK-P8-NEXT: lvx v3, 0, r4
245 ; CHECK-P8-NEXT: mr r30, r6
246 ; CHECK-P8-NEXT: mr r29, r5
247 ; CHECK-P8-NEXT: bl __mulkf3
249 ; CHECK-P8-NEXT: lvx v3, 0, r29
250 ; CHECK-P8-NEXT: bl __addkf3
252 ; CHECK-P8-NEXT: addi r3, r1, 32
253 ; CHECK-P8-NEXT: stvx v2, 0, r3
254 ; CHECK-P8-NEXT: lbz r4, 47(r1)
255 ; CHECK-P8-NEXT: xori r4, r4, 128
256 ; CHECK-P8-NEXT: stb r4, 47(r1)
257 ; CHECK-P8-NEXT: lvx v2, 0, r3
258 ; CHECK-P8-NEXT: stvx v2, 0, r30
259 ; CHECK-P8-NEXT: addi r1, r1, 80
260 ; CHECK-P8-NEXT: ld r0, 16(r1)
261 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
262 ; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
263 ; CHECK-P8-NEXT: mtlr r0
265 fp128* nocapture readonly %b,
266 fp128* nocapture readonly %c, fp128* nocapture %res) {
268 %0 = load fp128, fp128* %a, align 16
269 %1 = load fp128, fp128* %b, align 16
270 %mul = fmul contract fp128 %0, %1
271 %2 = load fp128, fp128* %c, align 16
272 %add = fadd contract fp128 %mul, %2
273 %sub = fsub fp128 0xL00000000000000008000000000000000, %add
274 store fp128 %sub, fp128* %res, align 16
278 ; Function Attrs: norecurse nounwind
279 define void @qpFmsub(fp128* nocapture readonly %a,
280 ; CHECK-LABEL: qpFmsub:
281 ; CHECK: # %bb.0: # %entry
282 ; CHECK-NEXT: lxv v2, 0(r3)
283 ; CHECK-NEXT: lxv v3, 0(r4)
284 ; CHECK-NEXT: lxv v4, 0(r5)
285 ; CHECK-NEXT: xsnmsubqp v2, v3, v4
286 ; CHECK-NEXT: stxv v2, 0(r6)
289 ; CHECK-P8-LABEL: qpFmsub:
290 ; CHECK-P8: # %bb.0: # %entry
291 ; CHECK-P8-NEXT: mflr r0
292 ; CHECK-P8-NEXT: std r0, 16(r1)
293 ; CHECK-P8-NEXT: stdu r1, -80(r1)
294 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 80
295 ; CHECK-P8-NEXT: .cfi_offset lr, 16
296 ; CHECK-P8-NEXT: .cfi_offset r30, -16
297 ; CHECK-P8-NEXT: .cfi_offset v31, -32
298 ; CHECK-P8-NEXT: li r7, 48
299 ; CHECK-P8-NEXT: lvx v2, 0, r4
300 ; CHECK-P8-NEXT: lvx v3, 0, r5
301 ; CHECK-P8-NEXT: std r30, 64(r1) # 8-byte Folded Spill
302 ; CHECK-P8-NEXT: mr r30, r6
303 ; CHECK-P8-NEXT: stvx v31, r1, r7 # 16-byte Folded Spill
304 ; CHECK-P8-NEXT: lvx v31, 0, r3
305 ; CHECK-P8-NEXT: bl __mulkf3
307 ; CHECK-P8-NEXT: vmr v3, v2
308 ; CHECK-P8-NEXT: vmr v2, v31
309 ; CHECK-P8-NEXT: bl __subkf3
311 ; CHECK-P8-NEXT: li r3, 48
312 ; CHECK-P8-NEXT: stvx v2, 0, r30
313 ; CHECK-P8-NEXT: ld r30, 64(r1) # 8-byte Folded Reload
314 ; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
315 ; CHECK-P8-NEXT: addi r1, r1, 80
316 ; CHECK-P8-NEXT: ld r0, 16(r1)
317 ; CHECK-P8-NEXT: mtlr r0
319 fp128* nocapture readonly %b,
320 fp128* nocapture readonly %c, fp128* nocapture %res) {
322 %0 = load fp128, fp128* %a, align 16
323 %1 = load fp128, fp128* %b, align 16
324 %2 = load fp128, fp128* %c, align 16
325 %mul = fmul contract fp128 %1, %2
326 %sub = fsub contract nsz fp128 %0, %mul
327 store fp128 %sub, fp128* %res, align 16
331 ; Function Attrs: norecurse nounwind
332 define void @qpFmsub_02(fp128* nocapture readonly %a,
333 ; CHECK-LABEL: qpFmsub_02:
334 ; CHECK: # %bb.0: # %entry
335 ; CHECK-NEXT: lxv v2, 0(r3)
336 ; CHECK-NEXT: lxv v3, 0(r4)
337 ; CHECK-NEXT: lxv v4, 0(r5)
338 ; CHECK-NEXT: xsmsubqp v4, v2, v3
339 ; CHECK-NEXT: stxv v4, 0(r6)
342 ; CHECK-P8-LABEL: qpFmsub_02:
343 ; CHECK-P8: # %bb.0: # %entry
344 ; CHECK-P8-NEXT: mflr r0
345 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 64
346 ; CHECK-P8-NEXT: .cfi_offset lr, 16
347 ; CHECK-P8-NEXT: .cfi_offset r29, -24
348 ; CHECK-P8-NEXT: .cfi_offset r30, -16
349 ; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill
350 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
351 ; CHECK-P8-NEXT: std r0, 16(r1)
352 ; CHECK-P8-NEXT: stdu r1, -64(r1)
353 ; CHECK-P8-NEXT: lvx v2, 0, r3
354 ; CHECK-P8-NEXT: lvx v3, 0, r4
355 ; CHECK-P8-NEXT: mr r30, r6
356 ; CHECK-P8-NEXT: mr r29, r5
357 ; CHECK-P8-NEXT: bl __mulkf3
359 ; CHECK-P8-NEXT: lvx v3, 0, r29
360 ; CHECK-P8-NEXT: bl __subkf3
362 ; CHECK-P8-NEXT: stvx v2, 0, r30
363 ; CHECK-P8-NEXT: addi r1, r1, 64
364 ; CHECK-P8-NEXT: ld r0, 16(r1)
365 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
366 ; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
367 ; CHECK-P8-NEXT: mtlr r0
369 fp128* nocapture readonly %b,
370 fp128* nocapture readonly %c, fp128* nocapture %res) {
372 %0 = load fp128, fp128* %a, align 16
373 %1 = load fp128, fp128* %b, align 16
374 %mul = fmul contract fp128 %0, %1
375 %2 = load fp128, fp128* %c, align 16
376 %sub = fsub contract fp128 %mul, %2
377 store fp128 %sub, fp128* %res, align 16
381 ; Function Attrs: norecurse nounwind
382 define void @qpFnmsub(fp128* nocapture readonly %a,
383 ; CHECK-LABEL: qpFnmsub:
384 ; CHECK: # %bb.0: # %entry
385 ; CHECK-NEXT: lxv v3, 0(r4)
386 ; CHECK-NEXT: lxv v2, 0(r3)
387 ; CHECK-NEXT: lxv v4, 0(r5)
388 ; CHECK-NEXT: xsnegqp v3, v3
389 ; CHECK-NEXT: xsnmaddqp v2, v3, v4
390 ; CHECK-NEXT: stxv v2, 0(r6)
393 ; CHECK-P8-LABEL: qpFnmsub:
394 ; CHECK-P8: # %bb.0: # %entry
395 ; CHECK-P8-NEXT: mflr r0
396 ; CHECK-P8-NEXT: std r0, 16(r1)
397 ; CHECK-P8-NEXT: stdu r1, -96(r1)
398 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 96
399 ; CHECK-P8-NEXT: .cfi_offset lr, 16
400 ; CHECK-P8-NEXT: .cfi_offset r30, -16
401 ; CHECK-P8-NEXT: .cfi_offset v31, -32
402 ; CHECK-P8-NEXT: li r7, 64
403 ; CHECK-P8-NEXT: lvx v2, 0, r4
404 ; CHECK-P8-NEXT: lvx v3, 0, r5
405 ; CHECK-P8-NEXT: std r30, 80(r1) # 8-byte Folded Spill
406 ; CHECK-P8-NEXT: mr r30, r6
407 ; CHECK-P8-NEXT: stvx v31, r1, r7 # 16-byte Folded Spill
408 ; CHECK-P8-NEXT: lvx v31, 0, r3
409 ; CHECK-P8-NEXT: bl __mulkf3
411 ; CHECK-P8-NEXT: vmr v3, v2
412 ; CHECK-P8-NEXT: vmr v2, v31
413 ; CHECK-P8-NEXT: bl __subkf3
415 ; CHECK-P8-NEXT: addi r3, r1, 48
416 ; CHECK-P8-NEXT: stvx v2, 0, r3
417 ; CHECK-P8-NEXT: lbz r4, 63(r1)
418 ; CHECK-P8-NEXT: xori r4, r4, 128
419 ; CHECK-P8-NEXT: stb r4, 63(r1)
420 ; CHECK-P8-NEXT: lvx v2, 0, r3
421 ; CHECK-P8-NEXT: li r3, 64
422 ; CHECK-P8-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload
423 ; CHECK-P8-NEXT: stvx v2, 0, r30
424 ; CHECK-P8-NEXT: ld r30, 80(r1) # 8-byte Folded Reload
425 ; CHECK-P8-NEXT: addi r1, r1, 96
426 ; CHECK-P8-NEXT: ld r0, 16(r1)
427 ; CHECK-P8-NEXT: mtlr r0
429 fp128* nocapture readonly %b,
430 fp128* nocapture readonly %c, fp128* nocapture %res) {
432 %0 = load fp128, fp128* %a, align 16
433 %1 = load fp128, fp128* %b, align 16
434 %2 = load fp128, fp128* %c, align 16
435 %mul = fmul contract fp128 %1, %2
436 %sub = fsub contract fp128 %0, %mul
437 %sub1 = fsub fp128 0xL00000000000000008000000000000000, %sub
438 store fp128 %sub1, fp128* %res, align 16
442 ; Function Attrs: norecurse nounwind
443 define void @qpFnmsub_02(fp128* nocapture readonly %a,
444 ; CHECK-LABEL: qpFnmsub_02:
445 ; CHECK: # %bb.0: # %entry
446 ; CHECK-NEXT: lxv v2, 0(r3)
447 ; CHECK-NEXT: lxv v3, 0(r4)
448 ; CHECK-NEXT: lxv v4, 0(r5)
449 ; CHECK-NEXT: xsnmsubqp v4, v2, v3
450 ; CHECK-NEXT: stxv v4, 0(r6)
453 ; CHECK-P8-LABEL: qpFnmsub_02:
454 ; CHECK-P8: # %bb.0: # %entry
455 ; CHECK-P8-NEXT: mflr r0
456 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 80
457 ; CHECK-P8-NEXT: .cfi_offset lr, 16
458 ; CHECK-P8-NEXT: .cfi_offset r29, -24
459 ; CHECK-P8-NEXT: .cfi_offset r30, -16
460 ; CHECK-P8-NEXT: std r29, -24(r1) # 8-byte Folded Spill
461 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
462 ; CHECK-P8-NEXT: std r0, 16(r1)
463 ; CHECK-P8-NEXT: stdu r1, -80(r1)
464 ; CHECK-P8-NEXT: lvx v2, 0, r3
465 ; CHECK-P8-NEXT: lvx v3, 0, r4
466 ; CHECK-P8-NEXT: mr r30, r6
467 ; CHECK-P8-NEXT: mr r29, r5
468 ; CHECK-P8-NEXT: bl __mulkf3
470 ; CHECK-P8-NEXT: lvx v3, 0, r29
471 ; CHECK-P8-NEXT: bl __subkf3
473 ; CHECK-P8-NEXT: addi r3, r1, 32
474 ; CHECK-P8-NEXT: stvx v2, 0, r3
475 ; CHECK-P8-NEXT: lbz r4, 47(r1)
476 ; CHECK-P8-NEXT: xori r4, r4, 128
477 ; CHECK-P8-NEXT: stb r4, 47(r1)
478 ; CHECK-P8-NEXT: lvx v2, 0, r3
479 ; CHECK-P8-NEXT: stvx v2, 0, r30
480 ; CHECK-P8-NEXT: addi r1, r1, 80
481 ; CHECK-P8-NEXT: ld r0, 16(r1)
482 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
483 ; CHECK-P8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
484 ; CHECK-P8-NEXT: mtlr r0
486 fp128* nocapture readonly %b,
487 fp128* nocapture readonly %c, fp128* nocapture %res) {
489 %0 = load fp128, fp128* %a, align 16
490 %1 = load fp128, fp128* %b, align 16
491 %mul = fmul contract fp128 %0, %1
492 %2 = load fp128, fp128* %c, align 16
493 %sub = fsub contract fp128 %mul, %2
494 %sub1 = fsub fp128 0xL00000000000000008000000000000000, %sub
495 store fp128 %sub1, fp128* %res, align 16