1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs \
3 ; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s
4 ; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs \
5 ; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s \
6 ; RUN: -check-prefix=CHECK-P8
8 ; Function Attrs: norecurse nounwind
9 define dso_local void @qpAdd(fp128* nocapture readonly %a, fp128* nocapture %res) {
11 ; CHECK: # %bb.0: # %entry
12 ; CHECK-NEXT: lxv v2, 0(r3)
13 ; CHECK-NEXT: xsaddqp v2, v2, v2
14 ; CHECK-NEXT: stxv v2, 0(r4)
17 ; CHECK-P8-LABEL: qpAdd:
18 ; CHECK-P8: # %bb.0: # %entry
19 ; CHECK-P8-NEXT: mflr r0
20 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
21 ; CHECK-P8-NEXT: .cfi_offset lr, 16
22 ; CHECK-P8-NEXT: .cfi_offset r30, -16
23 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
24 ; CHECK-P8-NEXT: std r0, 16(r1)
25 ; CHECK-P8-NEXT: stdu r1, -48(r1)
26 ; CHECK-P8-NEXT: lvx v2, 0, r3
27 ; CHECK-P8-NEXT: mr r30, r4
28 ; CHECK-P8-NEXT: vmr v3, v2
29 ; CHECK-P8-NEXT: bl __addkf3
31 ; CHECK-P8-NEXT: stvx v2, 0, r30
32 ; CHECK-P8-NEXT: addi r1, r1, 48
33 ; CHECK-P8-NEXT: ld r0, 16(r1)
34 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
35 ; CHECK-P8-NEXT: mtlr r0
38 %0 = load fp128, fp128* %a, align 16
39 %add = fadd fp128 %0, %0
40 store fp128 %add, fp128* %res, align 16
44 ; Function Attrs: norecurse nounwind
45 define dso_local void @qpSub(fp128* nocapture readonly %a, fp128* nocapture %res) {
47 ; CHECK: # %bb.0: # %entry
48 ; CHECK-NEXT: lxv v2, 0(r3)
49 ; CHECK-NEXT: xssubqp v2, v2, v2
50 ; CHECK-NEXT: stxv v2, 0(r4)
53 ; CHECK-P8-LABEL: qpSub:
54 ; CHECK-P8: # %bb.0: # %entry
55 ; CHECK-P8-NEXT: mflr r0
56 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
57 ; CHECK-P8-NEXT: .cfi_offset lr, 16
58 ; CHECK-P8-NEXT: .cfi_offset r30, -16
59 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
60 ; CHECK-P8-NEXT: std r0, 16(r1)
61 ; CHECK-P8-NEXT: stdu r1, -48(r1)
62 ; CHECK-P8-NEXT: lvx v2, 0, r3
63 ; CHECK-P8-NEXT: mr r30, r4
64 ; CHECK-P8-NEXT: vmr v3, v2
65 ; CHECK-P8-NEXT: bl __subkf3
67 ; CHECK-P8-NEXT: stvx v2, 0, r30
68 ; CHECK-P8-NEXT: addi r1, r1, 48
69 ; CHECK-P8-NEXT: ld r0, 16(r1)
70 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
71 ; CHECK-P8-NEXT: mtlr r0
74 %0 = load fp128, fp128* %a, align 16
75 %sub = fsub fp128 %0, %0
76 store fp128 %sub, fp128* %res, align 16
80 ; Function Attrs: norecurse nounwind
81 define dso_local void @qpMul(fp128* nocapture readonly %a, fp128* nocapture %res) {
83 ; CHECK: # %bb.0: # %entry
84 ; CHECK-NEXT: lxv v2, 0(r3)
85 ; CHECK-NEXT: xsmulqp v2, v2, v2
86 ; CHECK-NEXT: stxv v2, 0(r4)
89 ; CHECK-P8-LABEL: qpMul:
90 ; CHECK-P8: # %bb.0: # %entry
91 ; CHECK-P8-NEXT: mflr r0
92 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
93 ; CHECK-P8-NEXT: .cfi_offset lr, 16
94 ; CHECK-P8-NEXT: .cfi_offset r30, -16
95 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
96 ; CHECK-P8-NEXT: std r0, 16(r1)
97 ; CHECK-P8-NEXT: stdu r1, -48(r1)
98 ; CHECK-P8-NEXT: lvx v2, 0, r3
99 ; CHECK-P8-NEXT: mr r30, r4
100 ; CHECK-P8-NEXT: vmr v3, v2
101 ; CHECK-P8-NEXT: bl __mulkf3
103 ; CHECK-P8-NEXT: stvx v2, 0, r30
104 ; CHECK-P8-NEXT: addi r1, r1, 48
105 ; CHECK-P8-NEXT: ld r0, 16(r1)
106 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
107 ; CHECK-P8-NEXT: mtlr r0
110 %0 = load fp128, fp128* %a, align 16
111 %mul = fmul fp128 %0, %0
112 store fp128 %mul, fp128* %res, align 16
116 ; Function Attrs: norecurse nounwind
117 define dso_local void @qpDiv(fp128* nocapture readonly %a, fp128* nocapture %res) {
118 ; CHECK-LABEL: qpDiv:
119 ; CHECK: # %bb.0: # %entry
120 ; CHECK-NEXT: lxv v2, 0(r3)
121 ; CHECK-NEXT: xsdivqp v2, v2, v2
122 ; CHECK-NEXT: stxv v2, 0(r4)
125 ; CHECK-P8-LABEL: qpDiv:
126 ; CHECK-P8: # %bb.0: # %entry
127 ; CHECK-P8-NEXT: mflr r0
128 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
129 ; CHECK-P8-NEXT: .cfi_offset lr, 16
130 ; CHECK-P8-NEXT: .cfi_offset r30, -16
131 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
132 ; CHECK-P8-NEXT: std r0, 16(r1)
133 ; CHECK-P8-NEXT: stdu r1, -48(r1)
134 ; CHECK-P8-NEXT: lvx v2, 0, r3
135 ; CHECK-P8-NEXT: mr r30, r4
136 ; CHECK-P8-NEXT: vmr v3, v2
137 ; CHECK-P8-NEXT: bl __divkf3
139 ; CHECK-P8-NEXT: stvx v2, 0, r30
140 ; CHECK-P8-NEXT: addi r1, r1, 48
141 ; CHECK-P8-NEXT: ld r0, 16(r1)
142 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
143 ; CHECK-P8-NEXT: mtlr r0
146 %0 = load fp128, fp128* %a, align 16
147 %div = fdiv fp128 %0, %0
148 store fp128 %div, fp128* %res, align 16
152 define dso_local void @testLdNSt(i8* nocapture readonly %PtrC, fp128* nocapture %PtrF) {
153 ; CHECK-LABEL: testLdNSt:
154 ; CHECK: # %bb.0: # %entry
155 ; CHECK-NEXT: li r5, 4
156 ; CHECK-NEXT: lxvx vs0, r3, r5
157 ; CHECK-NEXT: li r3, 8
158 ; CHECK-NEXT: stxvx vs0, r4, r3
161 ; CHECK-P8-LABEL: testLdNSt:
162 ; CHECK-P8: # %bb.0: # %entry
163 ; CHECK-P8-NEXT: addi r3, r3, 4
164 ; CHECK-P8-NEXT: lvx v2, 0, r3
165 ; CHECK-P8-NEXT: addi r3, r4, 8
166 ; CHECK-P8-NEXT: stvx v2, 0, r3
169 %add.ptr = getelementptr inbounds i8, i8* %PtrC, i64 4
170 %0 = bitcast i8* %add.ptr to fp128*
171 %1 = load fp128, fp128* %0, align 16
172 %2 = bitcast fp128* %PtrF to i8*
173 %add.ptr1 = getelementptr inbounds i8, i8* %2, i64 8
174 %3 = bitcast i8* %add.ptr1 to fp128*
175 store fp128 %1, fp128* %3, align 16
179 define dso_local void @qpSqrt(fp128* nocapture readonly %a, fp128* nocapture %res) {
180 ; CHECK-LABEL: qpSqrt:
181 ; CHECK: # %bb.0: # %entry
182 ; CHECK-NEXT: lxv v2, 0(r3)
183 ; CHECK-NEXT: xssqrtqp v2, v2
184 ; CHECK-NEXT: stxv v2, 0(r4)
187 ; CHECK-P8-LABEL: qpSqrt:
188 ; CHECK-P8: # %bb.0: # %entry
189 ; CHECK-P8-NEXT: mflr r0
190 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
191 ; CHECK-P8-NEXT: .cfi_offset lr, 16
192 ; CHECK-P8-NEXT: .cfi_offset r30, -16
193 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
194 ; CHECK-P8-NEXT: std r0, 16(r1)
195 ; CHECK-P8-NEXT: stdu r1, -48(r1)
196 ; CHECK-P8-NEXT: lvx v2, 0, r3
197 ; CHECK-P8-NEXT: mr r30, r4
198 ; CHECK-P8-NEXT: bl sqrtf128
200 ; CHECK-P8-NEXT: stvx v2, 0, r30
201 ; CHECK-P8-NEXT: addi r1, r1, 48
202 ; CHECK-P8-NEXT: ld r0, 16(r1)
203 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
204 ; CHECK-P8-NEXT: mtlr r0
207 %0 = load fp128, fp128* %a, align 16
208 %1 = tail call fp128 @llvm.sqrt.f128(fp128 %0)
209 store fp128 %1, fp128* %res, align 16
213 declare fp128 @llvm.sqrt.f128(fp128 %Val)
215 define dso_local void @qpCpsgn(fp128* nocapture readonly %a, fp128* nocapture readonly %b,
216 ; CHECK-LABEL: qpCpsgn:
217 ; CHECK: # %bb.0: # %entry
218 ; CHECK-NEXT: lxv v2, 0(r3)
219 ; CHECK-NEXT: lxv v3, 0(r4)
220 ; CHECK-NEXT: xscpsgnqp v2, v3, v2
221 ; CHECK-NEXT: stxv v2, 0(r5)
224 ; CHECK-P8-LABEL: qpCpsgn:
225 ; CHECK-P8: # %bb.0: # %entry
226 ; CHECK-P8-NEXT: lvx v2, 0, r3
227 ; CHECK-P8-NEXT: lvx v3, 0, r4
228 ; CHECK-P8-NEXT: addi r3, r1, -16
229 ; CHECK-P8-NEXT: addi r4, r1, -32
230 ; CHECK-P8-NEXT: stvx v3, 0, r3
231 ; CHECK-P8-NEXT: stvx v2, 0, r4
232 ; CHECK-P8-NEXT: lbz r3, -1(r1)
233 ; CHECK-P8-NEXT: lbz r6, -17(r1)
234 ; CHECK-P8-NEXT: rlwimi r6, r3, 0, 0, 24
235 ; CHECK-P8-NEXT: stb r6, -17(r1)
236 ; CHECK-P8-NEXT: lvx v2, 0, r4
237 ; CHECK-P8-NEXT: stvx v2, 0, r5
239 fp128* nocapture %res) {
241 %0 = load fp128, fp128* %a, align 16
242 %1 = load fp128, fp128* %b, align 16
243 %2 = tail call fp128 @llvm.copysign.f128(fp128 %0, fp128 %1)
244 store fp128 %2, fp128* %res, align 16
248 declare fp128 @llvm.copysign.f128(fp128 %Mag, fp128 %Sgn)
250 define dso_local void @qpAbs(fp128* nocapture readonly %a, fp128* nocapture %res) {
251 ; CHECK-LABEL: qpAbs:
252 ; CHECK: # %bb.0: # %entry
253 ; CHECK-NEXT: lxv v2, 0(r3)
254 ; CHECK-NEXT: xsabsqp v2, v2
255 ; CHECK-NEXT: stxv v2, 0(r4)
258 ; CHECK-P8-LABEL: qpAbs:
259 ; CHECK-P8: # %bb.0: # %entry
260 ; CHECK-P8-NEXT: lvx v2, 0, r3
261 ; CHECK-P8-NEXT: addi r3, r1, -16
262 ; CHECK-P8-NEXT: stvx v2, 0, r3
263 ; CHECK-P8-NEXT: lbz r5, -1(r1)
264 ; CHECK-P8-NEXT: clrlwi r5, r5, 25
265 ; CHECK-P8-NEXT: stb r5, -1(r1)
266 ; CHECK-P8-NEXT: lvx v2, 0, r3
267 ; CHECK-P8-NEXT: stvx v2, 0, r4
270 %0 = load fp128, fp128* %a, align 16
271 %1 = tail call fp128 @llvm.fabs.f128(fp128 %0)
272 store fp128 %1, fp128* %res, align 16
276 declare fp128 @llvm.fabs.f128(fp128 %Val)
278 define dso_local void @qpNAbs(fp128* nocapture readonly %a, fp128* nocapture %res) {
279 ; CHECK-LABEL: qpNAbs:
280 ; CHECK: # %bb.0: # %entry
281 ; CHECK-NEXT: lxv v2, 0(r3)
282 ; CHECK-NEXT: xsnabsqp v2, v2
283 ; CHECK-NEXT: stxv v2, 0(r4)
286 ; CHECK-P8-LABEL: qpNAbs:
287 ; CHECK-P8: # %bb.0: # %entry
288 ; CHECK-P8-NEXT: lvx v2, 0, r3
289 ; CHECK-P8-NEXT: addi r3, r1, -32
290 ; CHECK-P8-NEXT: stvx v2, 0, r3
291 ; CHECK-P8-NEXT: lbz r5, -17(r1)
292 ; CHECK-P8-NEXT: clrlwi r5, r5, 25
293 ; CHECK-P8-NEXT: stb r5, -17(r1)
294 ; CHECK-P8-NEXT: lvx v2, 0, r3
295 ; CHECK-P8-NEXT: addi r3, r1, -16
296 ; CHECK-P8-NEXT: stvx v2, 0, r3
297 ; CHECK-P8-NEXT: lbz r5, -1(r1)
298 ; CHECK-P8-NEXT: xori r5, r5, 128
299 ; CHECK-P8-NEXT: stb r5, -1(r1)
300 ; CHECK-P8-NEXT: lvx v2, 0, r3
301 ; CHECK-P8-NEXT: stvx v2, 0, r4
304 %0 = load fp128, fp128* %a, align 16
305 %1 = tail call fp128 @llvm.fabs.f128(fp128 %0)
306 %neg = fsub fp128 0xL00000000000000008000000000000000, %1
307 store fp128 %neg, fp128* %res, align 16
312 define dso_local void @qpNeg(fp128* nocapture readonly %a, fp128* nocapture %res) {
313 ; CHECK-LABEL: qpNeg:
314 ; CHECK: # %bb.0: # %entry
315 ; CHECK-NEXT: lxv v2, 0(r3)
316 ; CHECK-NEXT: xsnegqp v2, v2
317 ; CHECK-NEXT: stxv v2, 0(r4)
320 ; CHECK-P8-LABEL: qpNeg:
321 ; CHECK-P8: # %bb.0: # %entry
322 ; CHECK-P8-NEXT: lvx v2, 0, r3
323 ; CHECK-P8-NEXT: addi r3, r1, -16
324 ; CHECK-P8-NEXT: stvx v2, 0, r3
325 ; CHECK-P8-NEXT: lbz r5, -1(r1)
326 ; CHECK-P8-NEXT: xori r5, r5, 128
327 ; CHECK-P8-NEXT: stb r5, -1(r1)
328 ; CHECK-P8-NEXT: lvx v2, 0, r3
329 ; CHECK-P8-NEXT: stvx v2, 0, r4
332 %0 = load fp128, fp128* %a, align 16
333 %sub = fsub fp128 0xL00000000000000008000000000000000, %0
334 store fp128 %sub, fp128* %res, align 16
339 define fp128 @qp_sin(fp128* nocapture readonly %a) {
340 ; CHECK-LABEL: qp_sin:
341 ; CHECK: # %bb.0: # %entry
342 ; CHECK-NEXT: mflr r0
343 ; CHECK-NEXT: std r0, 16(r1)
344 ; CHECK-NEXT: stdu r1, -32(r1)
345 ; CHECK-NEXT: .cfi_def_cfa_offset 32
346 ; CHECK-NEXT: .cfi_offset lr, 16
347 ; CHECK-NEXT: lxv v2, 0(r3)
348 ; CHECK-NEXT: bl sinf128
350 ; CHECK-NEXT: addi r1, r1, 32
351 ; CHECK-NEXT: ld r0, 16(r1)
352 ; CHECK-NEXT: mtlr r0
355 ; CHECK-P8-LABEL: qp_sin:
356 ; CHECK-P8: # %bb.0: # %entry
357 ; CHECK-P8-NEXT: mflr r0
358 ; CHECK-P8-NEXT: std r0, 16(r1)
359 ; CHECK-P8-NEXT: stdu r1, -32(r1)
360 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
361 ; CHECK-P8-NEXT: .cfi_offset lr, 16
362 ; CHECK-P8-NEXT: lvx v2, 0, r3
363 ; CHECK-P8-NEXT: bl sinf128
365 ; CHECK-P8-NEXT: addi r1, r1, 32
366 ; CHECK-P8-NEXT: ld r0, 16(r1)
367 ; CHECK-P8-NEXT: mtlr r0
370 %0 = load fp128, fp128* %a, align 16
371 %1 = tail call fp128 @llvm.sin.f128(fp128 %0)
374 declare fp128 @llvm.sin.f128(fp128 %Val)
376 define fp128 @qp_cos(fp128* nocapture readonly %a) {
377 ; CHECK-LABEL: qp_cos:
378 ; CHECK: # %bb.0: # %entry
379 ; CHECK-NEXT: mflr r0
380 ; CHECK-NEXT: std r0, 16(r1)
381 ; CHECK-NEXT: stdu r1, -32(r1)
382 ; CHECK-NEXT: .cfi_def_cfa_offset 32
383 ; CHECK-NEXT: .cfi_offset lr, 16
384 ; CHECK-NEXT: lxv v2, 0(r3)
385 ; CHECK-NEXT: bl cosf128
387 ; CHECK-NEXT: addi r1, r1, 32
388 ; CHECK-NEXT: ld r0, 16(r1)
389 ; CHECK-NEXT: mtlr r0
392 ; CHECK-P8-LABEL: qp_cos:
393 ; CHECK-P8: # %bb.0: # %entry
394 ; CHECK-P8-NEXT: mflr r0
395 ; CHECK-P8-NEXT: std r0, 16(r1)
396 ; CHECK-P8-NEXT: stdu r1, -32(r1)
397 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
398 ; CHECK-P8-NEXT: .cfi_offset lr, 16
399 ; CHECK-P8-NEXT: lvx v2, 0, r3
400 ; CHECK-P8-NEXT: bl cosf128
402 ; CHECK-P8-NEXT: addi r1, r1, 32
403 ; CHECK-P8-NEXT: ld r0, 16(r1)
404 ; CHECK-P8-NEXT: mtlr r0
407 %0 = load fp128, fp128* %a, align 16
408 %1 = tail call fp128 @llvm.cos.f128(fp128 %0)
411 declare fp128 @llvm.cos.f128(fp128 %Val)
413 define fp128 @qp_log(fp128* nocapture readonly %a) {
414 ; CHECK-LABEL: qp_log:
415 ; CHECK: # %bb.0: # %entry
416 ; CHECK-NEXT: mflr r0
417 ; CHECK-NEXT: std r0, 16(r1)
418 ; CHECK-NEXT: stdu r1, -32(r1)
419 ; CHECK-NEXT: .cfi_def_cfa_offset 32
420 ; CHECK-NEXT: .cfi_offset lr, 16
421 ; CHECK-NEXT: lxv v2, 0(r3)
422 ; CHECK-NEXT: bl logf128
424 ; CHECK-NEXT: addi r1, r1, 32
425 ; CHECK-NEXT: ld r0, 16(r1)
426 ; CHECK-NEXT: mtlr r0
429 ; CHECK-P8-LABEL: qp_log:
430 ; CHECK-P8: # %bb.0: # %entry
431 ; CHECK-P8-NEXT: mflr r0
432 ; CHECK-P8-NEXT: std r0, 16(r1)
433 ; CHECK-P8-NEXT: stdu r1, -32(r1)
434 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
435 ; CHECK-P8-NEXT: .cfi_offset lr, 16
436 ; CHECK-P8-NEXT: lvx v2, 0, r3
437 ; CHECK-P8-NEXT: bl logf128
439 ; CHECK-P8-NEXT: addi r1, r1, 32
440 ; CHECK-P8-NEXT: ld r0, 16(r1)
441 ; CHECK-P8-NEXT: mtlr r0
444 %0 = load fp128, fp128* %a, align 16
445 %1 = tail call fp128 @llvm.log.f128(fp128 %0)
448 declare fp128 @llvm.log.f128(fp128 %Val)
450 define fp128 @qp_log10(fp128* nocapture readonly %a) {
451 ; CHECK-LABEL: qp_log10:
452 ; CHECK: # %bb.0: # %entry
453 ; CHECK-NEXT: mflr r0
454 ; CHECK-NEXT: std r0, 16(r1)
455 ; CHECK-NEXT: stdu r1, -32(r1)
456 ; CHECK-NEXT: .cfi_def_cfa_offset 32
457 ; CHECK-NEXT: .cfi_offset lr, 16
458 ; CHECK-NEXT: lxv v2, 0(r3)
459 ; CHECK-NEXT: bl log10f128
461 ; CHECK-NEXT: addi r1, r1, 32
462 ; CHECK-NEXT: ld r0, 16(r1)
463 ; CHECK-NEXT: mtlr r0
466 ; CHECK-P8-LABEL: qp_log10:
467 ; CHECK-P8: # %bb.0: # %entry
468 ; CHECK-P8-NEXT: mflr r0
469 ; CHECK-P8-NEXT: std r0, 16(r1)
470 ; CHECK-P8-NEXT: stdu r1, -32(r1)
471 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
472 ; CHECK-P8-NEXT: .cfi_offset lr, 16
473 ; CHECK-P8-NEXT: lvx v2, 0, r3
474 ; CHECK-P8-NEXT: bl log10f128
476 ; CHECK-P8-NEXT: addi r1, r1, 32
477 ; CHECK-P8-NEXT: ld r0, 16(r1)
478 ; CHECK-P8-NEXT: mtlr r0
481 %0 = load fp128, fp128* %a, align 16
482 %1 = tail call fp128 @llvm.log10.f128(fp128 %0)
485 declare fp128 @llvm.log10.f128(fp128 %Val)
487 define fp128 @qp_log2(fp128* nocapture readonly %a) {
488 ; CHECK-LABEL: qp_log2:
489 ; CHECK: # %bb.0: # %entry
490 ; CHECK-NEXT: mflr r0
491 ; CHECK-NEXT: std r0, 16(r1)
492 ; CHECK-NEXT: stdu r1, -32(r1)
493 ; CHECK-NEXT: .cfi_def_cfa_offset 32
494 ; CHECK-NEXT: .cfi_offset lr, 16
495 ; CHECK-NEXT: lxv v2, 0(r3)
496 ; CHECK-NEXT: bl log2f128
498 ; CHECK-NEXT: addi r1, r1, 32
499 ; CHECK-NEXT: ld r0, 16(r1)
500 ; CHECK-NEXT: mtlr r0
503 ; CHECK-P8-LABEL: qp_log2:
504 ; CHECK-P8: # %bb.0: # %entry
505 ; CHECK-P8-NEXT: mflr r0
506 ; CHECK-P8-NEXT: std r0, 16(r1)
507 ; CHECK-P8-NEXT: stdu r1, -32(r1)
508 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
509 ; CHECK-P8-NEXT: .cfi_offset lr, 16
510 ; CHECK-P8-NEXT: lvx v2, 0, r3
511 ; CHECK-P8-NEXT: bl log2f128
513 ; CHECK-P8-NEXT: addi r1, r1, 32
514 ; CHECK-P8-NEXT: ld r0, 16(r1)
515 ; CHECK-P8-NEXT: mtlr r0
518 %0 = load fp128, fp128* %a, align 16
519 %1 = tail call fp128 @llvm.log2.f128(fp128 %0)
522 declare fp128 @llvm.log2.f128(fp128 %Val)
524 define fp128 @qp_minnum(fp128* nocapture readonly %a,
525 ; CHECK-LABEL: qp_minnum:
526 ; CHECK: # %bb.0: # %entry
527 ; CHECK-NEXT: mflr r0
528 ; CHECK-NEXT: std r0, 16(r1)
529 ; CHECK-NEXT: stdu r1, -32(r1)
530 ; CHECK-NEXT: .cfi_def_cfa_offset 32
531 ; CHECK-NEXT: .cfi_offset lr, 16
532 ; CHECK-NEXT: lxv v2, 0(r3)
533 ; CHECK-NEXT: lxv v3, 0(r4)
534 ; CHECK-NEXT: bl fminf128
536 ; CHECK-NEXT: addi r1, r1, 32
537 ; CHECK-NEXT: ld r0, 16(r1)
538 ; CHECK-NEXT: mtlr r0
541 ; CHECK-P8-LABEL: qp_minnum:
542 ; CHECK-P8: # %bb.0: # %entry
543 ; CHECK-P8-NEXT: mflr r0
544 ; CHECK-P8-NEXT: std r0, 16(r1)
545 ; CHECK-P8-NEXT: stdu r1, -32(r1)
546 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
547 ; CHECK-P8-NEXT: .cfi_offset lr, 16
548 ; CHECK-P8-NEXT: lvx v2, 0, r3
549 ; CHECK-P8-NEXT: lvx v3, 0, r4
550 ; CHECK-P8-NEXT: bl fminf128
552 ; CHECK-P8-NEXT: addi r1, r1, 32
553 ; CHECK-P8-NEXT: ld r0, 16(r1)
554 ; CHECK-P8-NEXT: mtlr r0
556 fp128* nocapture readonly %b) {
558 %0 = load fp128, fp128* %a, align 16
559 %1 = load fp128, fp128* %b, align 16
560 %2 = tail call fp128 @llvm.minnum.f128(fp128 %0, fp128 %1)
563 declare fp128 @llvm.minnum.f128(fp128 %Val0, fp128 %Val1)
565 define fp128 @qp_maxnum(fp128* nocapture readonly %a,
566 ; CHECK-LABEL: qp_maxnum:
567 ; CHECK: # %bb.0: # %entry
568 ; CHECK-NEXT: mflr r0
569 ; CHECK-NEXT: std r0, 16(r1)
570 ; CHECK-NEXT: stdu r1, -32(r1)
571 ; CHECK-NEXT: .cfi_def_cfa_offset 32
572 ; CHECK-NEXT: .cfi_offset lr, 16
573 ; CHECK-NEXT: lxv v2, 0(r3)
574 ; CHECK-NEXT: lxv v3, 0(r4)
575 ; CHECK-NEXT: bl fmaxf128
577 ; CHECK-NEXT: addi r1, r1, 32
578 ; CHECK-NEXT: ld r0, 16(r1)
579 ; CHECK-NEXT: mtlr r0
582 ; CHECK-P8-LABEL: qp_maxnum:
583 ; CHECK-P8: # %bb.0: # %entry
584 ; CHECK-P8-NEXT: mflr r0
585 ; CHECK-P8-NEXT: std r0, 16(r1)
586 ; CHECK-P8-NEXT: stdu r1, -32(r1)
587 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
588 ; CHECK-P8-NEXT: .cfi_offset lr, 16
589 ; CHECK-P8-NEXT: lvx v2, 0, r3
590 ; CHECK-P8-NEXT: lvx v3, 0, r4
591 ; CHECK-P8-NEXT: bl fmaxf128
593 ; CHECK-P8-NEXT: addi r1, r1, 32
594 ; CHECK-P8-NEXT: ld r0, 16(r1)
595 ; CHECK-P8-NEXT: mtlr r0
597 fp128* nocapture readonly %b) {
599 %0 = load fp128, fp128* %a, align 16
600 %1 = load fp128, fp128* %b, align 16
601 %2 = tail call fp128 @llvm.maxnum.f128(fp128 %0, fp128 %1)
604 declare fp128 @llvm.maxnum.f128(fp128 %Val0, fp128 %Val1)
606 define fp128 @qp_pow(fp128* nocapture readonly %a,
607 ; CHECK-LABEL: qp_pow:
608 ; CHECK: # %bb.0: # %entry
609 ; CHECK-NEXT: mflr r0
610 ; CHECK-NEXT: std r0, 16(r1)
611 ; CHECK-NEXT: stdu r1, -32(r1)
612 ; CHECK-NEXT: .cfi_def_cfa_offset 32
613 ; CHECK-NEXT: .cfi_offset lr, 16
614 ; CHECK-NEXT: lxv v2, 0(r3)
615 ; CHECK-NEXT: lxv v3, 0(r4)
616 ; CHECK-NEXT: bl powf128
618 ; CHECK-NEXT: addi r1, r1, 32
619 ; CHECK-NEXT: ld r0, 16(r1)
620 ; CHECK-NEXT: mtlr r0
623 ; CHECK-P8-LABEL: qp_pow:
624 ; CHECK-P8: # %bb.0: # %entry
625 ; CHECK-P8-NEXT: mflr r0
626 ; CHECK-P8-NEXT: std r0, 16(r1)
627 ; CHECK-P8-NEXT: stdu r1, -32(r1)
628 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
629 ; CHECK-P8-NEXT: .cfi_offset lr, 16
630 ; CHECK-P8-NEXT: lvx v2, 0, r3
631 ; CHECK-P8-NEXT: lvx v3, 0, r4
632 ; CHECK-P8-NEXT: bl powf128
634 ; CHECK-P8-NEXT: addi r1, r1, 32
635 ; CHECK-P8-NEXT: ld r0, 16(r1)
636 ; CHECK-P8-NEXT: mtlr r0
638 fp128* nocapture readonly %b) {
640 %0 = load fp128, fp128* %a, align 16
641 %1 = load fp128, fp128* %b, align 16
642 %2 = tail call fp128 @llvm.pow.f128(fp128 %0, fp128 %1)
645 declare fp128 @llvm.pow.f128(fp128 %Val, fp128 %Power)
647 define fp128 @qp_exp(fp128* nocapture readonly %a) {
648 ; CHECK-LABEL: qp_exp:
649 ; CHECK: # %bb.0: # %entry
650 ; CHECK-NEXT: mflr r0
651 ; CHECK-NEXT: std r0, 16(r1)
652 ; CHECK-NEXT: stdu r1, -32(r1)
653 ; CHECK-NEXT: .cfi_def_cfa_offset 32
654 ; CHECK-NEXT: .cfi_offset lr, 16
655 ; CHECK-NEXT: lxv v2, 0(r3)
656 ; CHECK-NEXT: bl expf128
658 ; CHECK-NEXT: addi r1, r1, 32
659 ; CHECK-NEXT: ld r0, 16(r1)
660 ; CHECK-NEXT: mtlr r0
663 ; CHECK-P8-LABEL: qp_exp:
664 ; CHECK-P8: # %bb.0: # %entry
665 ; CHECK-P8-NEXT: mflr r0
666 ; CHECK-P8-NEXT: std r0, 16(r1)
667 ; CHECK-P8-NEXT: stdu r1, -32(r1)
668 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
669 ; CHECK-P8-NEXT: .cfi_offset lr, 16
670 ; CHECK-P8-NEXT: lvx v2, 0, r3
671 ; CHECK-P8-NEXT: bl expf128
673 ; CHECK-P8-NEXT: addi r1, r1, 32
674 ; CHECK-P8-NEXT: ld r0, 16(r1)
675 ; CHECK-P8-NEXT: mtlr r0
678 %0 = load fp128, fp128* %a, align 16
679 %1 = tail call fp128 @llvm.exp.f128(fp128 %0)
682 declare fp128 @llvm.exp.f128(fp128 %Val)
684 define fp128 @qp_exp2(fp128* nocapture readonly %a) {
685 ; CHECK-LABEL: qp_exp2:
686 ; CHECK: # %bb.0: # %entry
687 ; CHECK-NEXT: mflr r0
688 ; CHECK-NEXT: std r0, 16(r1)
689 ; CHECK-NEXT: stdu r1, -32(r1)
690 ; CHECK-NEXT: .cfi_def_cfa_offset 32
691 ; CHECK-NEXT: .cfi_offset lr, 16
692 ; CHECK-NEXT: lxv v2, 0(r3)
693 ; CHECK-NEXT: bl exp2f128
695 ; CHECK-NEXT: addi r1, r1, 32
696 ; CHECK-NEXT: ld r0, 16(r1)
697 ; CHECK-NEXT: mtlr r0
700 ; CHECK-P8-LABEL: qp_exp2:
701 ; CHECK-P8: # %bb.0: # %entry
702 ; CHECK-P8-NEXT: mflr r0
703 ; CHECK-P8-NEXT: std r0, 16(r1)
704 ; CHECK-P8-NEXT: stdu r1, -32(r1)
705 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
706 ; CHECK-P8-NEXT: .cfi_offset lr, 16
707 ; CHECK-P8-NEXT: lvx v2, 0, r3
708 ; CHECK-P8-NEXT: bl exp2f128
710 ; CHECK-P8-NEXT: addi r1, r1, 32
711 ; CHECK-P8-NEXT: ld r0, 16(r1)
712 ; CHECK-P8-NEXT: mtlr r0
715 %0 = load fp128, fp128* %a, align 16
716 %1 = tail call fp128 @llvm.exp2.f128(fp128 %0)
719 declare fp128 @llvm.exp2.f128(fp128 %Val)
721 define dso_local void @qp_powi(fp128* nocapture readonly %a, i32* nocapture readonly %b,
722 ; CHECK-LABEL: qp_powi:
723 ; CHECK: # %bb.0: # %entry
724 ; CHECK-NEXT: mflr r0
725 ; CHECK-NEXT: .cfi_def_cfa_offset 48
726 ; CHECK-NEXT: .cfi_offset lr, 16
727 ; CHECK-NEXT: .cfi_offset r30, -16
728 ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
729 ; CHECK-NEXT: std r0, 16(r1)
730 ; CHECK-NEXT: stdu r1, -48(r1)
731 ; CHECK-NEXT: lxv v2, 0(r3)
732 ; CHECK-NEXT: mr r30, r5
733 ; CHECK-NEXT: lwz r5, 0(r4)
734 ; CHECK-NEXT: bl __powikf2
736 ; CHECK-NEXT: stxv v2, 0(r30)
737 ; CHECK-NEXT: addi r1, r1, 48
738 ; CHECK-NEXT: ld r0, 16(r1)
739 ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
740 ; CHECK-NEXT: mtlr r0
743 ; CHECK-P8-LABEL: qp_powi:
744 ; CHECK-P8: # %bb.0: # %entry
745 ; CHECK-P8-NEXT: mflr r0
746 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
747 ; CHECK-P8-NEXT: .cfi_offset lr, 16
748 ; CHECK-P8-NEXT: .cfi_offset r30, -16
749 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
750 ; CHECK-P8-NEXT: std r0, 16(r1)
751 ; CHECK-P8-NEXT: stdu r1, -48(r1)
752 ; CHECK-P8-NEXT: lvx v2, 0, r3
753 ; CHECK-P8-NEXT: lwz r3, 0(r4)
754 ; CHECK-P8-NEXT: mr r30, r5
755 ; CHECK-P8-NEXT: mr r5, r3
756 ; CHECK-P8-NEXT: bl __powikf2
758 ; CHECK-P8-NEXT: stvx v2, 0, r30
759 ; CHECK-P8-NEXT: addi r1, r1, 48
760 ; CHECK-P8-NEXT: ld r0, 16(r1)
761 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
762 ; CHECK-P8-NEXT: mtlr r0
764 fp128* nocapture %res) {
766 %0 = load fp128, fp128* %a, align 16
767 %1 = load i32, i32* %b, align 8
768 %2 = tail call fp128 @llvm.powi.f128.i32(fp128 %0, i32 %1)
769 store fp128 %2, fp128* %res, align 16
772 declare fp128 @llvm.powi.f128.i32(fp128 %Val, i32 %power)
774 @a = common dso_local global fp128 0xL00000000000000000000000000000000, align 16
775 @b = common dso_local global fp128 0xL00000000000000000000000000000000, align 16
777 define fp128 @qp_frem() #0 {
778 ; CHECK-LABEL: qp_frem:
779 ; CHECK: # %bb.0: # %entry
780 ; CHECK-NEXT: mflr r0
781 ; CHECK-NEXT: std r0, 16(r1)
782 ; CHECK-NEXT: stdu r1, -32(r1)
783 ; CHECK-NEXT: .cfi_def_cfa_offset 32
784 ; CHECK-NEXT: .cfi_offset lr, 16
785 ; CHECK-NEXT: addis r3, r2, a@toc@ha
786 ; CHECK-NEXT: addi r3, r3, a@toc@l
787 ; CHECK-NEXT: lxv v2, 0(r3)
788 ; CHECK-NEXT: addis r3, r2, b@toc@ha
789 ; CHECK-NEXT: addi r3, r3, b@toc@l
790 ; CHECK-NEXT: lxv v3, 0(r3)
791 ; CHECK-NEXT: bl fmodf128
793 ; CHECK-NEXT: addi r1, r1, 32
794 ; CHECK-NEXT: ld r0, 16(r1)
795 ; CHECK-NEXT: mtlr r0
798 ; CHECK-P8-LABEL: qp_frem:
799 ; CHECK-P8: # %bb.0: # %entry
800 ; CHECK-P8-NEXT: mflr r0
801 ; CHECK-P8-NEXT: std r0, 16(r1)
802 ; CHECK-P8-NEXT: stdu r1, -32(r1)
803 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
804 ; CHECK-P8-NEXT: .cfi_offset lr, 16
805 ; CHECK-P8-NEXT: addis r3, r2, a@toc@ha
806 ; CHECK-P8-NEXT: addis r4, r2, b@toc@ha
807 ; CHECK-P8-NEXT: addi r3, r3, a@toc@l
808 ; CHECK-P8-NEXT: addi r4, r4, b@toc@l
809 ; CHECK-P8-NEXT: lvx v2, 0, r3
810 ; CHECK-P8-NEXT: lvx v3, 0, r4
811 ; CHECK-P8-NEXT: bl fmodf128
813 ; CHECK-P8-NEXT: addi r1, r1, 32
814 ; CHECK-P8-NEXT: ld r0, 16(r1)
815 ; CHECK-P8-NEXT: mtlr r0
818 %0 = load fp128, fp128* @a, align 16
819 %1 = load fp128, fp128* @b, align 16
820 %rem = frem fp128 %0, %1
824 define dso_local void @qpCeil(fp128* nocapture readonly %a, fp128* nocapture %res) {
825 ; CHECK-LABEL: qpCeil:
826 ; CHECK: # %bb.0: # %entry
827 ; CHECK-NEXT: lxv v2, 0(r3)
828 ; CHECK-NEXT: xsrqpi 1, v2, v2, 2
829 ; CHECK-NEXT: stxv v2, 0(r4)
832 ; CHECK-P8-LABEL: qpCeil:
833 ; CHECK-P8: # %bb.0: # %entry
834 ; CHECK-P8-NEXT: mflr r0
835 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
836 ; CHECK-P8-NEXT: .cfi_offset lr, 16
837 ; CHECK-P8-NEXT: .cfi_offset r30, -16
838 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
839 ; CHECK-P8-NEXT: std r0, 16(r1)
840 ; CHECK-P8-NEXT: stdu r1, -48(r1)
841 ; CHECK-P8-NEXT: lvx v2, 0, r3
842 ; CHECK-P8-NEXT: mr r30, r4
843 ; CHECK-P8-NEXT: bl ceilf128
845 ; CHECK-P8-NEXT: stvx v2, 0, r30
846 ; CHECK-P8-NEXT: addi r1, r1, 48
847 ; CHECK-P8-NEXT: ld r0, 16(r1)
848 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
849 ; CHECK-P8-NEXT: mtlr r0
852 %0 = load fp128, fp128* %a, align 16
853 %1 = tail call fp128 @llvm.ceil.f128(fp128 %0)
854 store fp128 %1, fp128* %res, align 16
857 declare fp128 @llvm.ceil.f128(fp128 %Val)
859 define dso_local void @qpFloor(fp128* nocapture readonly %a, fp128* nocapture %res) {
860 ; CHECK-LABEL: qpFloor:
861 ; CHECK: # %bb.0: # %entry
862 ; CHECK-NEXT: lxv v2, 0(r3)
863 ; CHECK-NEXT: xsrqpi 1, v2, v2, 3
864 ; CHECK-NEXT: stxv v2, 0(r4)
867 ; CHECK-P8-LABEL: qpFloor:
868 ; CHECK-P8: # %bb.0: # %entry
869 ; CHECK-P8-NEXT: mflr r0
870 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
871 ; CHECK-P8-NEXT: .cfi_offset lr, 16
872 ; CHECK-P8-NEXT: .cfi_offset r30, -16
873 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
874 ; CHECK-P8-NEXT: std r0, 16(r1)
875 ; CHECK-P8-NEXT: stdu r1, -48(r1)
876 ; CHECK-P8-NEXT: lvx v2, 0, r3
877 ; CHECK-P8-NEXT: mr r30, r4
878 ; CHECK-P8-NEXT: bl floorf128
880 ; CHECK-P8-NEXT: stvx v2, 0, r30
881 ; CHECK-P8-NEXT: addi r1, r1, 48
882 ; CHECK-P8-NEXT: ld r0, 16(r1)
883 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
884 ; CHECK-P8-NEXT: mtlr r0
887 %0 = load fp128, fp128* %a, align 16
888 %1 = tail call fp128 @llvm.floor.f128(fp128 %0)
889 store fp128 %1, fp128* %res, align 16
892 declare fp128 @llvm.floor.f128(fp128 %Val)
894 define dso_local void @qpTrunc(fp128* nocapture readonly %a, fp128* nocapture %res) {
895 ; CHECK-LABEL: qpTrunc:
896 ; CHECK: # %bb.0: # %entry
897 ; CHECK-NEXT: lxv v2, 0(r3)
898 ; CHECK-NEXT: xsrqpi 1, v2, v2, 1
899 ; CHECK-NEXT: stxv v2, 0(r4)
902 ; CHECK-P8-LABEL: qpTrunc:
903 ; CHECK-P8: # %bb.0: # %entry
904 ; CHECK-P8-NEXT: mflr r0
905 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
906 ; CHECK-P8-NEXT: .cfi_offset lr, 16
907 ; CHECK-P8-NEXT: .cfi_offset r30, -16
908 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
909 ; CHECK-P8-NEXT: std r0, 16(r1)
910 ; CHECK-P8-NEXT: stdu r1, -48(r1)
911 ; CHECK-P8-NEXT: lvx v2, 0, r3
912 ; CHECK-P8-NEXT: mr r30, r4
913 ; CHECK-P8-NEXT: bl truncf128
915 ; CHECK-P8-NEXT: stvx v2, 0, r30
916 ; CHECK-P8-NEXT: addi r1, r1, 48
917 ; CHECK-P8-NEXT: ld r0, 16(r1)
918 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
919 ; CHECK-P8-NEXT: mtlr r0
922 %0 = load fp128, fp128* %a, align 16
923 %1 = tail call fp128 @llvm.trunc.f128(fp128 %0)
924 store fp128 %1, fp128* %res, align 16
927 declare fp128 @llvm.trunc.f128(fp128 %Val)
929 define dso_local void @qpRound(fp128* nocapture readonly %a, fp128* nocapture %res) {
930 ; CHECK-LABEL: qpRound:
931 ; CHECK: # %bb.0: # %entry
932 ; CHECK-NEXT: lxv v2, 0(r3)
933 ; CHECK-NEXT: xsrqpi 0, v2, v2, 0
934 ; CHECK-NEXT: stxv v2, 0(r4)
937 ; CHECK-P8-LABEL: qpRound:
938 ; CHECK-P8: # %bb.0: # %entry
939 ; CHECK-P8-NEXT: mflr r0
940 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
941 ; CHECK-P8-NEXT: .cfi_offset lr, 16
942 ; CHECK-P8-NEXT: .cfi_offset r30, -16
943 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
944 ; CHECK-P8-NEXT: std r0, 16(r1)
945 ; CHECK-P8-NEXT: stdu r1, -48(r1)
946 ; CHECK-P8-NEXT: lvx v2, 0, r3
947 ; CHECK-P8-NEXT: mr r30, r4
948 ; CHECK-P8-NEXT: bl roundf128
950 ; CHECK-P8-NEXT: stvx v2, 0, r30
951 ; CHECK-P8-NEXT: addi r1, r1, 48
952 ; CHECK-P8-NEXT: ld r0, 16(r1)
953 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
954 ; CHECK-P8-NEXT: mtlr r0
957 %0 = load fp128, fp128* %a, align 16
958 %1 = tail call fp128 @llvm.round.f128(fp128 %0)
959 store fp128 %1, fp128* %res, align 16
962 declare fp128 @llvm.round.f128(fp128 %Val)
964 define dso_local void @qpLRound(fp128* nocapture readonly %a, i32* nocapture %res) {
965 ; CHECK-LABEL: qpLRound:
966 ; CHECK: # %bb.0: # %entry
967 ; CHECK-NEXT: mflr r0
968 ; CHECK-NEXT: .cfi_def_cfa_offset 48
969 ; CHECK-NEXT: .cfi_offset lr, 16
970 ; CHECK-NEXT: .cfi_offset r30, -16
971 ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
972 ; CHECK-NEXT: std r0, 16(r1)
973 ; CHECK-NEXT: stdu r1, -48(r1)
974 ; CHECK-NEXT: lxv v2, 0(r3)
975 ; CHECK-NEXT: mr r30, r4
976 ; CHECK-NEXT: bl lroundf128
978 ; CHECK-NEXT: stw r3, 0(r30)
979 ; CHECK-NEXT: addi r1, r1, 48
980 ; CHECK-NEXT: ld r0, 16(r1)
981 ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
982 ; CHECK-NEXT: mtlr r0
985 ; CHECK-P8-LABEL: qpLRound:
986 ; CHECK-P8: # %bb.0: # %entry
987 ; CHECK-P8-NEXT: mflr r0
988 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
989 ; CHECK-P8-NEXT: .cfi_offset lr, 16
990 ; CHECK-P8-NEXT: .cfi_offset r30, -16
991 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
992 ; CHECK-P8-NEXT: std r0, 16(r1)
993 ; CHECK-P8-NEXT: stdu r1, -48(r1)
994 ; CHECK-P8-NEXT: lvx v2, 0, r3
995 ; CHECK-P8-NEXT: mr r30, r4
996 ; CHECK-P8-NEXT: bl lroundf128
998 ; CHECK-P8-NEXT: stw r3, 0(r30)
999 ; CHECK-P8-NEXT: addi r1, r1, 48
1000 ; CHECK-P8-NEXT: ld r0, 16(r1)
1001 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
1002 ; CHECK-P8-NEXT: mtlr r0
1003 ; CHECK-P8-NEXT: blr
1005 %0 = load fp128, fp128* %a, align 16
1006 %1 = tail call i32 @llvm.lround.f128(fp128 %0)
1007 store i32 %1, i32* %res, align 16
1010 declare i32 @llvm.lround.f128(fp128 %Val)
1012 define dso_local void @qpLLRound(fp128* nocapture readonly %a, i64* nocapture %res) {
1013 ; CHECK-LABEL: qpLLRound:
1014 ; CHECK: # %bb.0: # %entry
1015 ; CHECK-NEXT: mflr r0
1016 ; CHECK-NEXT: .cfi_def_cfa_offset 48
1017 ; CHECK-NEXT: .cfi_offset lr, 16
1018 ; CHECK-NEXT: .cfi_offset r30, -16
1019 ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
1020 ; CHECK-NEXT: std r0, 16(r1)
1021 ; CHECK-NEXT: stdu r1, -48(r1)
1022 ; CHECK-NEXT: lxv v2, 0(r3)
1023 ; CHECK-NEXT: mr r30, r4
1024 ; CHECK-NEXT: bl llroundf128
1026 ; CHECK-NEXT: std r3, 0(r30)
1027 ; CHECK-NEXT: addi r1, r1, 48
1028 ; CHECK-NEXT: ld r0, 16(r1)
1029 ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
1030 ; CHECK-NEXT: mtlr r0
1033 ; CHECK-P8-LABEL: qpLLRound:
1034 ; CHECK-P8: # %bb.0: # %entry
1035 ; CHECK-P8-NEXT: mflr r0
1036 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
1037 ; CHECK-P8-NEXT: .cfi_offset lr, 16
1038 ; CHECK-P8-NEXT: .cfi_offset r30, -16
1039 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
1040 ; CHECK-P8-NEXT: std r0, 16(r1)
1041 ; CHECK-P8-NEXT: stdu r1, -48(r1)
1042 ; CHECK-P8-NEXT: lvx v2, 0, r3
1043 ; CHECK-P8-NEXT: mr r30, r4
1044 ; CHECK-P8-NEXT: bl llroundf128
1045 ; CHECK-P8-NEXT: nop
1046 ; CHECK-P8-NEXT: std r3, 0(r30)
1047 ; CHECK-P8-NEXT: addi r1, r1, 48
1048 ; CHECK-P8-NEXT: ld r0, 16(r1)
1049 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
1050 ; CHECK-P8-NEXT: mtlr r0
1051 ; CHECK-P8-NEXT: blr
1053 %0 = load fp128, fp128* %a, align 16
1054 %1 = tail call i64 @llvm.llround.f128(fp128 %0)
1055 store i64 %1, i64* %res, align 16
1058 declare i64 @llvm.llround.f128(fp128 %Val)
1060 define dso_local void @qpRint(fp128* nocapture readonly %a, fp128* nocapture %res) {
1061 ; CHECK-LABEL: qpRint:
1062 ; CHECK: # %bb.0: # %entry
1063 ; CHECK-NEXT: lxv v2, 0(r3)
1064 ; CHECK-NEXT: xsrqpix 0, v2, v2, 3
1065 ; CHECK-NEXT: stxv v2, 0(r4)
1068 ; CHECK-P8-LABEL: qpRint:
1069 ; CHECK-P8: # %bb.0: # %entry
1070 ; CHECK-P8-NEXT: mflr r0
1071 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
1072 ; CHECK-P8-NEXT: .cfi_offset lr, 16
1073 ; CHECK-P8-NEXT: .cfi_offset r30, -16
1074 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
1075 ; CHECK-P8-NEXT: std r0, 16(r1)
1076 ; CHECK-P8-NEXT: stdu r1, -48(r1)
1077 ; CHECK-P8-NEXT: lvx v2, 0, r3
1078 ; CHECK-P8-NEXT: mr r30, r4
1079 ; CHECK-P8-NEXT: bl rintf128
1080 ; CHECK-P8-NEXT: nop
1081 ; CHECK-P8-NEXT: stvx v2, 0, r30
1082 ; CHECK-P8-NEXT: addi r1, r1, 48
1083 ; CHECK-P8-NEXT: ld r0, 16(r1)
1084 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
1085 ; CHECK-P8-NEXT: mtlr r0
1086 ; CHECK-P8-NEXT: blr
1088 %0 = load fp128, fp128* %a, align 16
1089 %1 = tail call fp128 @llvm.rint.f128(fp128 %0)
1090 store fp128 %1, fp128* %res, align 16
1093 declare fp128 @llvm.rint.f128(fp128 %Val)
1095 define dso_local void @qpLRint(fp128* nocapture readonly %a, i32* nocapture %res) {
1096 ; CHECK-LABEL: qpLRint:
1097 ; CHECK: # %bb.0: # %entry
1098 ; CHECK-NEXT: mflr r0
1099 ; CHECK-NEXT: .cfi_def_cfa_offset 48
1100 ; CHECK-NEXT: .cfi_offset lr, 16
1101 ; CHECK-NEXT: .cfi_offset r30, -16
1102 ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
1103 ; CHECK-NEXT: std r0, 16(r1)
1104 ; CHECK-NEXT: stdu r1, -48(r1)
1105 ; CHECK-NEXT: lxv v2, 0(r3)
1106 ; CHECK-NEXT: mr r30, r4
1107 ; CHECK-NEXT: bl lrintf128
1109 ; CHECK-NEXT: stw r3, 0(r30)
1110 ; CHECK-NEXT: addi r1, r1, 48
1111 ; CHECK-NEXT: ld r0, 16(r1)
1112 ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
1113 ; CHECK-NEXT: mtlr r0
1116 ; CHECK-P8-LABEL: qpLRint:
1117 ; CHECK-P8: # %bb.0: # %entry
1118 ; CHECK-P8-NEXT: mflr r0
1119 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
1120 ; CHECK-P8-NEXT: .cfi_offset lr, 16
1121 ; CHECK-P8-NEXT: .cfi_offset r30, -16
1122 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
1123 ; CHECK-P8-NEXT: std r0, 16(r1)
1124 ; CHECK-P8-NEXT: stdu r1, -48(r1)
1125 ; CHECK-P8-NEXT: lvx v2, 0, r3
1126 ; CHECK-P8-NEXT: mr r30, r4
1127 ; CHECK-P8-NEXT: bl lrintf128
1128 ; CHECK-P8-NEXT: nop
1129 ; CHECK-P8-NEXT: stw r3, 0(r30)
1130 ; CHECK-P8-NEXT: addi r1, r1, 48
1131 ; CHECK-P8-NEXT: ld r0, 16(r1)
1132 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
1133 ; CHECK-P8-NEXT: mtlr r0
1134 ; CHECK-P8-NEXT: blr
1136 %0 = load fp128, fp128* %a, align 16
1137 %1 = tail call i32 @llvm.lrint.f128(fp128 %0)
1138 store i32 %1, i32* %res, align 16
1141 declare i32 @llvm.lrint.f128(fp128 %Val)
1143 define dso_local void @qpLLRint(fp128* nocapture readonly %a, i64* nocapture %res) {
1144 ; CHECK-LABEL: qpLLRint:
1145 ; CHECK: # %bb.0: # %entry
1146 ; CHECK-NEXT: mflr r0
1147 ; CHECK-NEXT: .cfi_def_cfa_offset 48
1148 ; CHECK-NEXT: .cfi_offset lr, 16
1149 ; CHECK-NEXT: .cfi_offset r30, -16
1150 ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
1151 ; CHECK-NEXT: std r0, 16(r1)
1152 ; CHECK-NEXT: stdu r1, -48(r1)
1153 ; CHECK-NEXT: lxv v2, 0(r3)
1154 ; CHECK-NEXT: mr r30, r4
1155 ; CHECK-NEXT: bl llrintf128
1157 ; CHECK-NEXT: std r3, 0(r30)
1158 ; CHECK-NEXT: addi r1, r1, 48
1159 ; CHECK-NEXT: ld r0, 16(r1)
1160 ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
1161 ; CHECK-NEXT: mtlr r0
1164 ; CHECK-P8-LABEL: qpLLRint:
1165 ; CHECK-P8: # %bb.0: # %entry
1166 ; CHECK-P8-NEXT: mflr r0
1167 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
1168 ; CHECK-P8-NEXT: .cfi_offset lr, 16
1169 ; CHECK-P8-NEXT: .cfi_offset r30, -16
1170 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
1171 ; CHECK-P8-NEXT: std r0, 16(r1)
1172 ; CHECK-P8-NEXT: stdu r1, -48(r1)
1173 ; CHECK-P8-NEXT: lvx v2, 0, r3
1174 ; CHECK-P8-NEXT: mr r30, r4
1175 ; CHECK-P8-NEXT: bl llrintf128
1176 ; CHECK-P8-NEXT: nop
1177 ; CHECK-P8-NEXT: std r3, 0(r30)
1178 ; CHECK-P8-NEXT: addi r1, r1, 48
1179 ; CHECK-P8-NEXT: ld r0, 16(r1)
1180 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
1181 ; CHECK-P8-NEXT: mtlr r0
1182 ; CHECK-P8-NEXT: blr
1184 %0 = load fp128, fp128* %a, align 16
1185 %1 = tail call i64 @llvm.llrint.f128(fp128 %0)
1186 store i64 %1, i64* %res, align 16
1189 declare i64 @llvm.llrint.f128(fp128 %Val)
1191 define dso_local void @qpNearByInt(fp128* nocapture readonly %a, fp128* nocapture %res) {
1192 ; CHECK-LABEL: qpNearByInt:
1193 ; CHECK: # %bb.0: # %entry
1194 ; CHECK-NEXT: lxv v2, 0(r3)
1195 ; CHECK-NEXT: xsrqpi 0, v2, v2, 3
1196 ; CHECK-NEXT: stxv v2, 0(r4)
1199 ; CHECK-P8-LABEL: qpNearByInt:
1200 ; CHECK-P8: # %bb.0: # %entry
1201 ; CHECK-P8-NEXT: mflr r0
1202 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
1203 ; CHECK-P8-NEXT: .cfi_offset lr, 16
1204 ; CHECK-P8-NEXT: .cfi_offset r30, -16
1205 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
1206 ; CHECK-P8-NEXT: std r0, 16(r1)
1207 ; CHECK-P8-NEXT: stdu r1, -48(r1)
1208 ; CHECK-P8-NEXT: lvx v2, 0, r3
1209 ; CHECK-P8-NEXT: mr r30, r4
1210 ; CHECK-P8-NEXT: bl nearbyintf128
1211 ; CHECK-P8-NEXT: nop
1212 ; CHECK-P8-NEXT: stvx v2, 0, r30
1213 ; CHECK-P8-NEXT: addi r1, r1, 48
1214 ; CHECK-P8-NEXT: ld r0, 16(r1)
1215 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
1216 ; CHECK-P8-NEXT: mtlr r0
1217 ; CHECK-P8-NEXT: blr
1219 %0 = load fp128, fp128* %a, align 16
1220 %1 = tail call fp128 @llvm.nearbyint.f128(fp128 %0)
1221 store fp128 %1, fp128* %res, align 16
1224 declare fp128 @llvm.nearbyint.f128(fp128 %Val)
1226 define dso_local void @qpFMA(fp128* %a, fp128* %b, fp128* %c, fp128* %res) {
1227 ; CHECK-LABEL: qpFMA:
1228 ; CHECK: # %bb.0: # %entry
1229 ; CHECK-NEXT: lxv v2, 0(r3)
1230 ; CHECK-NEXT: lxv v3, 0(r4)
1231 ; CHECK-NEXT: lxv v4, 0(r5)
1232 ; CHECK-NEXT: xsmaddqp v4, v2, v3
1233 ; CHECK-NEXT: stxv v4, 0(r6)
1236 ; CHECK-P8-LABEL: qpFMA:
1237 ; CHECK-P8: # %bb.0: # %entry
1238 ; CHECK-P8-NEXT: mflr r0
1239 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
1240 ; CHECK-P8-NEXT: .cfi_offset lr, 16
1241 ; CHECK-P8-NEXT: .cfi_offset r30, -16
1242 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
1243 ; CHECK-P8-NEXT: std r0, 16(r1)
1244 ; CHECK-P8-NEXT: stdu r1, -48(r1)
1245 ; CHECK-P8-NEXT: lvx v2, 0, r3
1246 ; CHECK-P8-NEXT: lvx v3, 0, r4
1247 ; CHECK-P8-NEXT: lvx v4, 0, r5
1248 ; CHECK-P8-NEXT: mr r30, r6
1249 ; CHECK-P8-NEXT: bl fmaf128
1250 ; CHECK-P8-NEXT: nop
1251 ; CHECK-P8-NEXT: stvx v2, 0, r30
1252 ; CHECK-P8-NEXT: addi r1, r1, 48
1253 ; CHECK-P8-NEXT: ld r0, 16(r1)
1254 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
1255 ; CHECK-P8-NEXT: mtlr r0
1256 ; CHECK-P8-NEXT: blr
1258 %0 = load fp128, fp128* %a, align 16
1259 %1 = load fp128, fp128* %b, align 16
1260 %2 = load fp128, fp128* %c, align 16
1261 %3 = tail call fp128 @llvm.fma.f128(fp128 %0, fp128 %1, fp128 %2)
1262 store fp128 %3, fp128* %res, align 16
1265 declare fp128 @llvm.fma.f128(fp128, fp128, fp128)