1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs \
3 ; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s
4 ; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs \
5 ; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s \
6 ; RUN: -check-prefix=CHECK-P8
8 ; Function Attrs: norecurse nounwind
9 define dso_local void @qpAdd(ptr nocapture readonly %a, ptr nocapture %res) {
11 ; CHECK: # %bb.0: # %entry
12 ; CHECK-NEXT: lxv v2, 0(r3)
13 ; CHECK-NEXT: xsaddqp v2, v2, v2
14 ; CHECK-NEXT: stxv v2, 0(r4)
17 ; CHECK-P8-LABEL: qpAdd:
18 ; CHECK-P8: # %bb.0: # %entry
19 ; CHECK-P8-NEXT: mflr r0
20 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
21 ; CHECK-P8-NEXT: .cfi_offset lr, 16
22 ; CHECK-P8-NEXT: .cfi_offset r30, -16
23 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
24 ; CHECK-P8-NEXT: stdu r1, -48(r1)
25 ; CHECK-P8-NEXT: std r0, 64(r1)
26 ; CHECK-P8-NEXT: mr r30, r4
27 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
28 ; CHECK-P8-NEXT: xxswapd v2, vs0
29 ; CHECK-P8-NEXT: vmr v3, v2
30 ; CHECK-P8-NEXT: bl __addkf3
32 ; CHECK-P8-NEXT: xxswapd vs0, v2
33 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
34 ; CHECK-P8-NEXT: addi r1, r1, 48
35 ; CHECK-P8-NEXT: ld r0, 16(r1)
36 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
37 ; CHECK-P8-NEXT: mtlr r0
40 %0 = load fp128, ptr %a, align 16
41 %add = fadd fp128 %0, %0
42 store fp128 %add, ptr %res, align 16
46 ; Function Attrs: norecurse nounwind
47 define dso_local void @qpSub(ptr nocapture readonly %a, ptr nocapture %res) {
49 ; CHECK: # %bb.0: # %entry
50 ; CHECK-NEXT: lxv v2, 0(r3)
51 ; CHECK-NEXT: xssubqp v2, v2, v2
52 ; CHECK-NEXT: stxv v2, 0(r4)
55 ; CHECK-P8-LABEL: qpSub:
56 ; CHECK-P8: # %bb.0: # %entry
57 ; CHECK-P8-NEXT: mflr r0
58 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
59 ; CHECK-P8-NEXT: .cfi_offset lr, 16
60 ; CHECK-P8-NEXT: .cfi_offset r30, -16
61 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
62 ; CHECK-P8-NEXT: stdu r1, -48(r1)
63 ; CHECK-P8-NEXT: std r0, 64(r1)
64 ; CHECK-P8-NEXT: mr r30, r4
65 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
66 ; CHECK-P8-NEXT: xxswapd v2, vs0
67 ; CHECK-P8-NEXT: vmr v3, v2
68 ; CHECK-P8-NEXT: bl __subkf3
70 ; CHECK-P8-NEXT: xxswapd vs0, v2
71 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
72 ; CHECK-P8-NEXT: addi r1, r1, 48
73 ; CHECK-P8-NEXT: ld r0, 16(r1)
74 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
75 ; CHECK-P8-NEXT: mtlr r0
78 %0 = load fp128, ptr %a, align 16
79 %sub = fsub fp128 %0, %0
80 store fp128 %sub, ptr %res, align 16
84 ; Function Attrs: norecurse nounwind
85 define dso_local void @qpMul(ptr nocapture readonly %a, ptr nocapture %res) {
87 ; CHECK: # %bb.0: # %entry
88 ; CHECK-NEXT: lxv v2, 0(r3)
89 ; CHECK-NEXT: xsmulqp v2, v2, v2
90 ; CHECK-NEXT: stxv v2, 0(r4)
93 ; CHECK-P8-LABEL: qpMul:
94 ; CHECK-P8: # %bb.0: # %entry
95 ; CHECK-P8-NEXT: mflr r0
96 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
97 ; CHECK-P8-NEXT: .cfi_offset lr, 16
98 ; CHECK-P8-NEXT: .cfi_offset r30, -16
99 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
100 ; CHECK-P8-NEXT: stdu r1, -48(r1)
101 ; CHECK-P8-NEXT: std r0, 64(r1)
102 ; CHECK-P8-NEXT: mr r30, r4
103 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
104 ; CHECK-P8-NEXT: xxswapd v2, vs0
105 ; CHECK-P8-NEXT: vmr v3, v2
106 ; CHECK-P8-NEXT: bl __mulkf3
108 ; CHECK-P8-NEXT: xxswapd vs0, v2
109 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
110 ; CHECK-P8-NEXT: addi r1, r1, 48
111 ; CHECK-P8-NEXT: ld r0, 16(r1)
112 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
113 ; CHECK-P8-NEXT: mtlr r0
116 %0 = load fp128, ptr %a, align 16
117 %mul = fmul fp128 %0, %0
118 store fp128 %mul, ptr %res, align 16
122 ; Function Attrs: norecurse nounwind
123 define dso_local void @qpDiv(ptr nocapture readonly %a, ptr nocapture %res) {
124 ; CHECK-LABEL: qpDiv:
125 ; CHECK: # %bb.0: # %entry
126 ; CHECK-NEXT: lxv v2, 0(r3)
127 ; CHECK-NEXT: xsdivqp v2, v2, v2
128 ; CHECK-NEXT: stxv v2, 0(r4)
131 ; CHECK-P8-LABEL: qpDiv:
132 ; CHECK-P8: # %bb.0: # %entry
133 ; CHECK-P8-NEXT: mflr r0
134 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
135 ; CHECK-P8-NEXT: .cfi_offset lr, 16
136 ; CHECK-P8-NEXT: .cfi_offset r30, -16
137 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
138 ; CHECK-P8-NEXT: stdu r1, -48(r1)
139 ; CHECK-P8-NEXT: std r0, 64(r1)
140 ; CHECK-P8-NEXT: mr r30, r4
141 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
142 ; CHECK-P8-NEXT: xxswapd v2, vs0
143 ; CHECK-P8-NEXT: vmr v3, v2
144 ; CHECK-P8-NEXT: bl __divkf3
146 ; CHECK-P8-NEXT: xxswapd vs0, v2
147 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
148 ; CHECK-P8-NEXT: addi r1, r1, 48
149 ; CHECK-P8-NEXT: ld r0, 16(r1)
150 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
151 ; CHECK-P8-NEXT: mtlr r0
154 %0 = load fp128, ptr %a, align 16
155 %div = fdiv fp128 %0, %0
156 store fp128 %div, ptr %res, align 16
160 define dso_local void @testLdNSt(ptr nocapture readonly %PtrC, ptr nocapture %PtrF) {
161 ; CHECK-LABEL: testLdNSt:
162 ; CHECK: # %bb.0: # %entry
163 ; CHECK-NEXT: li r5, 4
164 ; CHECK-NEXT: lxvx vs0, r3, r5
165 ; CHECK-NEXT: li r3, 8
166 ; CHECK-NEXT: stxvx vs0, r4, r3
169 ; CHECK-P8-LABEL: testLdNSt:
170 ; CHECK-P8: # %bb.0: # %entry
171 ; CHECK-P8-NEXT: addi r3, r3, 4
172 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
173 ; CHECK-P8-NEXT: addi r3, r4, 8
174 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
177 %add.ptr = getelementptr inbounds i8, ptr %PtrC, i64 4
178 %0 = load fp128, ptr %add.ptr, align 16
179 %add.ptr1 = getelementptr inbounds i8, ptr %PtrF, i64 8
180 store fp128 %0, ptr %add.ptr1, align 16
184 define dso_local void @qpSqrt(ptr nocapture readonly %a, ptr nocapture %res) {
185 ; CHECK-LABEL: qpSqrt:
186 ; CHECK: # %bb.0: # %entry
187 ; CHECK-NEXT: lxv v2, 0(r3)
188 ; CHECK-NEXT: xssqrtqp v2, v2
189 ; CHECK-NEXT: stxv v2, 0(r4)
192 ; CHECK-P8-LABEL: qpSqrt:
193 ; CHECK-P8: # %bb.0: # %entry
194 ; CHECK-P8-NEXT: mflr r0
195 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
196 ; CHECK-P8-NEXT: .cfi_offset lr, 16
197 ; CHECK-P8-NEXT: .cfi_offset r30, -16
198 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
199 ; CHECK-P8-NEXT: stdu r1, -48(r1)
200 ; CHECK-P8-NEXT: std r0, 64(r1)
201 ; CHECK-P8-NEXT: mr r30, r4
202 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
203 ; CHECK-P8-NEXT: xxswapd v2, vs0
204 ; CHECK-P8-NEXT: bl sqrtf128
206 ; CHECK-P8-NEXT: xxswapd vs0, v2
207 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
208 ; CHECK-P8-NEXT: addi r1, r1, 48
209 ; CHECK-P8-NEXT: ld r0, 16(r1)
210 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
211 ; CHECK-P8-NEXT: mtlr r0
214 %0 = load fp128, ptr %a, align 16
215 %1 = tail call fp128 @llvm.sqrt.f128(fp128 %0)
216 store fp128 %1, ptr %res, align 16
220 declare fp128 @llvm.sqrt.f128(fp128 %Val)
222 define dso_local void @qpCpsgn(ptr nocapture readonly %a, ptr nocapture readonly %b,
223 ; CHECK-LABEL: qpCpsgn:
224 ; CHECK: # %bb.0: # %entry
225 ; CHECK-NEXT: lxv v2, 0(r3)
226 ; CHECK-NEXT: lxv v3, 0(r4)
227 ; CHECK-NEXT: xscpsgnqp v2, v3, v2
228 ; CHECK-NEXT: stxv v2, 0(r5)
231 ; CHECK-P8-LABEL: qpCpsgn:
232 ; CHECK-P8: # %bb.0: # %entry
233 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
234 ; CHECK-P8-NEXT: addi r4, r1, -16
235 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r4
236 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
237 ; CHECK-P8-NEXT: addi r3, r1, -32
238 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
239 ; CHECK-P8-NEXT: lbz r4, -1(r1)
240 ; CHECK-P8-NEXT: lbz r6, -17(r1)
241 ; CHECK-P8-NEXT: rlwimi r6, r4, 0, 0, 24
242 ; CHECK-P8-NEXT: stb r6, -17(r1)
243 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
244 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r5
246 ptr nocapture %res) {
248 %0 = load fp128, ptr %a, align 16
249 %1 = load fp128, ptr %b, align 16
250 %2 = tail call fp128 @llvm.copysign.f128(fp128 %0, fp128 %1)
251 store fp128 %2, ptr %res, align 16
255 declare fp128 @llvm.copysign.f128(fp128 %Mag, fp128 %Sgn)
257 define dso_local void @qpAbs(ptr nocapture readonly %a, ptr nocapture %res) {
258 ; CHECK-LABEL: qpAbs:
259 ; CHECK: # %bb.0: # %entry
260 ; CHECK-NEXT: lxv v2, 0(r3)
261 ; CHECK-NEXT: xsabsqp v2, v2
262 ; CHECK-NEXT: stxv v2, 0(r4)
265 ; CHECK-P8-LABEL: qpAbs:
266 ; CHECK-P8: # %bb.0: # %entry
267 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
268 ; CHECK-P8-NEXT: addi r3, r1, -16
269 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
270 ; CHECK-P8-NEXT: lbz r5, -1(r1)
271 ; CHECK-P8-NEXT: clrlwi r5, r5, 25
272 ; CHECK-P8-NEXT: stb r5, -1(r1)
273 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
274 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r4
277 %0 = load fp128, ptr %a, align 16
278 %1 = tail call fp128 @llvm.fabs.f128(fp128 %0)
279 store fp128 %1, ptr %res, align 16
283 declare fp128 @llvm.fabs.f128(fp128 %Val)
285 define dso_local void @qpNAbs(ptr nocapture readonly %a, ptr nocapture %res) {
286 ; CHECK-LABEL: qpNAbs:
287 ; CHECK: # %bb.0: # %entry
288 ; CHECK-NEXT: lxv v2, 0(r3)
289 ; CHECK-NEXT: xsnabsqp v2, v2
290 ; CHECK-NEXT: stxv v2, 0(r4)
293 ; CHECK-P8-LABEL: qpNAbs:
294 ; CHECK-P8: # %bb.0: # %entry
295 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
296 ; CHECK-P8-NEXT: addi r3, r1, -32
297 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
298 ; CHECK-P8-NEXT: lbz r5, -17(r1)
299 ; CHECK-P8-NEXT: clrlwi r5, r5, 25
300 ; CHECK-P8-NEXT: stb r5, -17(r1)
301 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
302 ; CHECK-P8-NEXT: addi r3, r1, -16
303 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
304 ; CHECK-P8-NEXT: lbz r5, -1(r1)
305 ; CHECK-P8-NEXT: xori r5, r5, 128
306 ; CHECK-P8-NEXT: stb r5, -1(r1)
307 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
308 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r4
311 %0 = load fp128, ptr %a, align 16
312 %1 = tail call fp128 @llvm.fabs.f128(fp128 %0)
313 %neg = fsub fp128 0xL00000000000000008000000000000000, %1
314 store fp128 %neg, ptr %res, align 16
319 define dso_local void @qpNeg(ptr nocapture readonly %a, ptr nocapture %res) {
320 ; CHECK-LABEL: qpNeg:
321 ; CHECK: # %bb.0: # %entry
322 ; CHECK-NEXT: lxv v2, 0(r3)
323 ; CHECK-NEXT: xsnegqp v2, v2
324 ; CHECK-NEXT: stxv v2, 0(r4)
327 ; CHECK-P8-LABEL: qpNeg:
328 ; CHECK-P8: # %bb.0: # %entry
329 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
330 ; CHECK-P8-NEXT: addi r3, r1, -16
331 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r3
332 ; CHECK-P8-NEXT: lbz r5, -1(r1)
333 ; CHECK-P8-NEXT: xori r5, r5, 128
334 ; CHECK-P8-NEXT: stb r5, -1(r1)
335 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
336 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r4
339 %0 = load fp128, ptr %a, align 16
340 %sub = fsub fp128 0xL00000000000000008000000000000000, %0
341 store fp128 %sub, ptr %res, align 16
346 define fp128 @qp_sin(ptr nocapture readonly %a) {
347 ; CHECK-LABEL: qp_sin:
348 ; CHECK: # %bb.0: # %entry
349 ; CHECK-NEXT: mflr r0
350 ; CHECK-NEXT: stdu r1, -32(r1)
351 ; CHECK-NEXT: std r0, 48(r1)
352 ; CHECK-NEXT: .cfi_def_cfa_offset 32
353 ; CHECK-NEXT: .cfi_offset lr, 16
354 ; CHECK-NEXT: lxv v2, 0(r3)
355 ; CHECK-NEXT: bl sinf128
357 ; CHECK-NEXT: addi r1, r1, 32
358 ; CHECK-NEXT: ld r0, 16(r1)
359 ; CHECK-NEXT: mtlr r0
362 ; CHECK-P8-LABEL: qp_sin:
363 ; CHECK-P8: # %bb.0: # %entry
364 ; CHECK-P8-NEXT: mflr r0
365 ; CHECK-P8-NEXT: stdu r1, -32(r1)
366 ; CHECK-P8-NEXT: std r0, 48(r1)
367 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
368 ; CHECK-P8-NEXT: .cfi_offset lr, 16
369 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
370 ; CHECK-P8-NEXT: xxswapd v2, vs0
371 ; CHECK-P8-NEXT: bl sinf128
373 ; CHECK-P8-NEXT: addi r1, r1, 32
374 ; CHECK-P8-NEXT: ld r0, 16(r1)
375 ; CHECK-P8-NEXT: mtlr r0
378 %0 = load fp128, ptr %a, align 16
379 %1 = tail call fp128 @llvm.sin.f128(fp128 %0)
382 declare fp128 @llvm.sin.f128(fp128 %Val)
384 define fp128 @qp_cos(ptr nocapture readonly %a) {
385 ; CHECK-LABEL: qp_cos:
386 ; CHECK: # %bb.0: # %entry
387 ; CHECK-NEXT: mflr r0
388 ; CHECK-NEXT: stdu r1, -32(r1)
389 ; CHECK-NEXT: std r0, 48(r1)
390 ; CHECK-NEXT: .cfi_def_cfa_offset 32
391 ; CHECK-NEXT: .cfi_offset lr, 16
392 ; CHECK-NEXT: lxv v2, 0(r3)
393 ; CHECK-NEXT: bl cosf128
395 ; CHECK-NEXT: addi r1, r1, 32
396 ; CHECK-NEXT: ld r0, 16(r1)
397 ; CHECK-NEXT: mtlr r0
400 ; CHECK-P8-LABEL: qp_cos:
401 ; CHECK-P8: # %bb.0: # %entry
402 ; CHECK-P8-NEXT: mflr r0
403 ; CHECK-P8-NEXT: stdu r1, -32(r1)
404 ; CHECK-P8-NEXT: std r0, 48(r1)
405 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
406 ; CHECK-P8-NEXT: .cfi_offset lr, 16
407 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
408 ; CHECK-P8-NEXT: xxswapd v2, vs0
409 ; CHECK-P8-NEXT: bl cosf128
411 ; CHECK-P8-NEXT: addi r1, r1, 32
412 ; CHECK-P8-NEXT: ld r0, 16(r1)
413 ; CHECK-P8-NEXT: mtlr r0
416 %0 = load fp128, ptr %a, align 16
417 %1 = tail call fp128 @llvm.cos.f128(fp128 %0)
420 declare fp128 @llvm.cos.f128(fp128 %Val)
422 define fp128 @qp_log(ptr nocapture readonly %a) {
423 ; CHECK-LABEL: qp_log:
424 ; CHECK: # %bb.0: # %entry
425 ; CHECK-NEXT: mflr r0
426 ; CHECK-NEXT: stdu r1, -32(r1)
427 ; CHECK-NEXT: std r0, 48(r1)
428 ; CHECK-NEXT: .cfi_def_cfa_offset 32
429 ; CHECK-NEXT: .cfi_offset lr, 16
430 ; CHECK-NEXT: lxv v2, 0(r3)
431 ; CHECK-NEXT: bl logf128
433 ; CHECK-NEXT: addi r1, r1, 32
434 ; CHECK-NEXT: ld r0, 16(r1)
435 ; CHECK-NEXT: mtlr r0
438 ; CHECK-P8-LABEL: qp_log:
439 ; CHECK-P8: # %bb.0: # %entry
440 ; CHECK-P8-NEXT: mflr r0
441 ; CHECK-P8-NEXT: stdu r1, -32(r1)
442 ; CHECK-P8-NEXT: std r0, 48(r1)
443 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
444 ; CHECK-P8-NEXT: .cfi_offset lr, 16
445 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
446 ; CHECK-P8-NEXT: xxswapd v2, vs0
447 ; CHECK-P8-NEXT: bl logf128
449 ; CHECK-P8-NEXT: addi r1, r1, 32
450 ; CHECK-P8-NEXT: ld r0, 16(r1)
451 ; CHECK-P8-NEXT: mtlr r0
454 %0 = load fp128, ptr %a, align 16
455 %1 = tail call fp128 @llvm.log.f128(fp128 %0)
458 declare fp128 @llvm.log.f128(fp128 %Val)
460 define fp128 @qp_log10(ptr nocapture readonly %a) {
461 ; CHECK-LABEL: qp_log10:
462 ; CHECK: # %bb.0: # %entry
463 ; CHECK-NEXT: mflr r0
464 ; CHECK-NEXT: stdu r1, -32(r1)
465 ; CHECK-NEXT: std r0, 48(r1)
466 ; CHECK-NEXT: .cfi_def_cfa_offset 32
467 ; CHECK-NEXT: .cfi_offset lr, 16
468 ; CHECK-NEXT: lxv v2, 0(r3)
469 ; CHECK-NEXT: bl log10f128
471 ; CHECK-NEXT: addi r1, r1, 32
472 ; CHECK-NEXT: ld r0, 16(r1)
473 ; CHECK-NEXT: mtlr r0
476 ; CHECK-P8-LABEL: qp_log10:
477 ; CHECK-P8: # %bb.0: # %entry
478 ; CHECK-P8-NEXT: mflr r0
479 ; CHECK-P8-NEXT: stdu r1, -32(r1)
480 ; CHECK-P8-NEXT: std r0, 48(r1)
481 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
482 ; CHECK-P8-NEXT: .cfi_offset lr, 16
483 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
484 ; CHECK-P8-NEXT: xxswapd v2, vs0
485 ; CHECK-P8-NEXT: bl log10f128
487 ; CHECK-P8-NEXT: addi r1, r1, 32
488 ; CHECK-P8-NEXT: ld r0, 16(r1)
489 ; CHECK-P8-NEXT: mtlr r0
492 %0 = load fp128, ptr %a, align 16
493 %1 = tail call fp128 @llvm.log10.f128(fp128 %0)
496 declare fp128 @llvm.log10.f128(fp128 %Val)
498 define fp128 @qp_log2(ptr nocapture readonly %a) {
499 ; CHECK-LABEL: qp_log2:
500 ; CHECK: # %bb.0: # %entry
501 ; CHECK-NEXT: mflr r0
502 ; CHECK-NEXT: stdu r1, -32(r1)
503 ; CHECK-NEXT: std r0, 48(r1)
504 ; CHECK-NEXT: .cfi_def_cfa_offset 32
505 ; CHECK-NEXT: .cfi_offset lr, 16
506 ; CHECK-NEXT: lxv v2, 0(r3)
507 ; CHECK-NEXT: bl log2f128
509 ; CHECK-NEXT: addi r1, r1, 32
510 ; CHECK-NEXT: ld r0, 16(r1)
511 ; CHECK-NEXT: mtlr r0
514 ; CHECK-P8-LABEL: qp_log2:
515 ; CHECK-P8: # %bb.0: # %entry
516 ; CHECK-P8-NEXT: mflr r0
517 ; CHECK-P8-NEXT: stdu r1, -32(r1)
518 ; CHECK-P8-NEXT: std r0, 48(r1)
519 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
520 ; CHECK-P8-NEXT: .cfi_offset lr, 16
521 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
522 ; CHECK-P8-NEXT: xxswapd v2, vs0
523 ; CHECK-P8-NEXT: bl log2f128
525 ; CHECK-P8-NEXT: addi r1, r1, 32
526 ; CHECK-P8-NEXT: ld r0, 16(r1)
527 ; CHECK-P8-NEXT: mtlr r0
530 %0 = load fp128, ptr %a, align 16
531 %1 = tail call fp128 @llvm.log2.f128(fp128 %0)
534 declare fp128 @llvm.log2.f128(fp128 %Val)
536 define fp128 @qp_minnum(ptr nocapture readonly %a,
537 ; CHECK-LABEL: qp_minnum:
538 ; CHECK: # %bb.0: # %entry
539 ; CHECK-NEXT: mflr r0
540 ; CHECK-NEXT: stdu r1, -32(r1)
541 ; CHECK-NEXT: std r0, 48(r1)
542 ; CHECK-NEXT: .cfi_def_cfa_offset 32
543 ; CHECK-NEXT: .cfi_offset lr, 16
544 ; CHECK-NEXT: lxv v2, 0(r3)
545 ; CHECK-NEXT: lxv v3, 0(r4)
546 ; CHECK-NEXT: bl fminf128
548 ; CHECK-NEXT: addi r1, r1, 32
549 ; CHECK-NEXT: ld r0, 16(r1)
550 ; CHECK-NEXT: mtlr r0
553 ; CHECK-P8-LABEL: qp_minnum:
554 ; CHECK-P8: # %bb.0: # %entry
555 ; CHECK-P8-NEXT: mflr r0
556 ; CHECK-P8-NEXT: stdu r1, -32(r1)
557 ; CHECK-P8-NEXT: std r0, 48(r1)
558 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
559 ; CHECK-P8-NEXT: .cfi_offset lr, 16
560 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
561 ; CHECK-P8-NEXT: xxswapd v2, vs0
562 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
563 ; CHECK-P8-NEXT: xxswapd v3, vs0
564 ; CHECK-P8-NEXT: bl fminf128
566 ; CHECK-P8-NEXT: addi r1, r1, 32
567 ; CHECK-P8-NEXT: ld r0, 16(r1)
568 ; CHECK-P8-NEXT: mtlr r0
570 ptr nocapture readonly %b) {
572 %0 = load fp128, ptr %a, align 16
573 %1 = load fp128, ptr %b, align 16
574 %2 = tail call fp128 @llvm.minnum.f128(fp128 %0, fp128 %1)
577 declare fp128 @llvm.minnum.f128(fp128 %Val0, fp128 %Val1)
579 define fp128 @qp_maxnum(ptr nocapture readonly %a,
580 ; CHECK-LABEL: qp_maxnum:
581 ; CHECK: # %bb.0: # %entry
582 ; CHECK-NEXT: mflr r0
583 ; CHECK-NEXT: stdu r1, -32(r1)
584 ; CHECK-NEXT: std r0, 48(r1)
585 ; CHECK-NEXT: .cfi_def_cfa_offset 32
586 ; CHECK-NEXT: .cfi_offset lr, 16
587 ; CHECK-NEXT: lxv v2, 0(r3)
588 ; CHECK-NEXT: lxv v3, 0(r4)
589 ; CHECK-NEXT: bl fmaxf128
591 ; CHECK-NEXT: addi r1, r1, 32
592 ; CHECK-NEXT: ld r0, 16(r1)
593 ; CHECK-NEXT: mtlr r0
596 ; CHECK-P8-LABEL: qp_maxnum:
597 ; CHECK-P8: # %bb.0: # %entry
598 ; CHECK-P8-NEXT: mflr r0
599 ; CHECK-P8-NEXT: stdu r1, -32(r1)
600 ; CHECK-P8-NEXT: std r0, 48(r1)
601 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
602 ; CHECK-P8-NEXT: .cfi_offset lr, 16
603 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
604 ; CHECK-P8-NEXT: xxswapd v2, vs0
605 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
606 ; CHECK-P8-NEXT: xxswapd v3, vs0
607 ; CHECK-P8-NEXT: bl fmaxf128
609 ; CHECK-P8-NEXT: addi r1, r1, 32
610 ; CHECK-P8-NEXT: ld r0, 16(r1)
611 ; CHECK-P8-NEXT: mtlr r0
613 ptr nocapture readonly %b) {
615 %0 = load fp128, ptr %a, align 16
616 %1 = load fp128, ptr %b, align 16
617 %2 = tail call fp128 @llvm.maxnum.f128(fp128 %0, fp128 %1)
620 declare fp128 @llvm.maxnum.f128(fp128 %Val0, fp128 %Val1)
622 define fp128 @qp_pow(ptr nocapture readonly %a,
623 ; CHECK-LABEL: qp_pow:
624 ; CHECK: # %bb.0: # %entry
625 ; CHECK-NEXT: mflr r0
626 ; CHECK-NEXT: stdu r1, -32(r1)
627 ; CHECK-NEXT: std r0, 48(r1)
628 ; CHECK-NEXT: .cfi_def_cfa_offset 32
629 ; CHECK-NEXT: .cfi_offset lr, 16
630 ; CHECK-NEXT: lxv v2, 0(r3)
631 ; CHECK-NEXT: lxv v3, 0(r4)
632 ; CHECK-NEXT: bl powf128
634 ; CHECK-NEXT: addi r1, r1, 32
635 ; CHECK-NEXT: ld r0, 16(r1)
636 ; CHECK-NEXT: mtlr r0
639 ; CHECK-P8-LABEL: qp_pow:
640 ; CHECK-P8: # %bb.0: # %entry
641 ; CHECK-P8-NEXT: mflr r0
642 ; CHECK-P8-NEXT: stdu r1, -32(r1)
643 ; CHECK-P8-NEXT: std r0, 48(r1)
644 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
645 ; CHECK-P8-NEXT: .cfi_offset lr, 16
646 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
647 ; CHECK-P8-NEXT: xxswapd v2, vs0
648 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
649 ; CHECK-P8-NEXT: xxswapd v3, vs0
650 ; CHECK-P8-NEXT: bl powf128
652 ; CHECK-P8-NEXT: addi r1, r1, 32
653 ; CHECK-P8-NEXT: ld r0, 16(r1)
654 ; CHECK-P8-NEXT: mtlr r0
656 ptr nocapture readonly %b) {
658 %0 = load fp128, ptr %a, align 16
659 %1 = load fp128, ptr %b, align 16
660 %2 = tail call fp128 @llvm.pow.f128(fp128 %0, fp128 %1)
663 declare fp128 @llvm.pow.f128(fp128 %Val, fp128 %Power)
665 define fp128 @qp_exp(ptr nocapture readonly %a) {
666 ; CHECK-LABEL: qp_exp:
667 ; CHECK: # %bb.0: # %entry
668 ; CHECK-NEXT: mflr r0
669 ; CHECK-NEXT: stdu r1, -32(r1)
670 ; CHECK-NEXT: std r0, 48(r1)
671 ; CHECK-NEXT: .cfi_def_cfa_offset 32
672 ; CHECK-NEXT: .cfi_offset lr, 16
673 ; CHECK-NEXT: lxv v2, 0(r3)
674 ; CHECK-NEXT: bl expf128
676 ; CHECK-NEXT: addi r1, r1, 32
677 ; CHECK-NEXT: ld r0, 16(r1)
678 ; CHECK-NEXT: mtlr r0
681 ; CHECK-P8-LABEL: qp_exp:
682 ; CHECK-P8: # %bb.0: # %entry
683 ; CHECK-P8-NEXT: mflr r0
684 ; CHECK-P8-NEXT: stdu r1, -32(r1)
685 ; CHECK-P8-NEXT: std r0, 48(r1)
686 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
687 ; CHECK-P8-NEXT: .cfi_offset lr, 16
688 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
689 ; CHECK-P8-NEXT: xxswapd v2, vs0
690 ; CHECK-P8-NEXT: bl expf128
692 ; CHECK-P8-NEXT: addi r1, r1, 32
693 ; CHECK-P8-NEXT: ld r0, 16(r1)
694 ; CHECK-P8-NEXT: mtlr r0
697 %0 = load fp128, ptr %a, align 16
698 %1 = tail call fp128 @llvm.exp.f128(fp128 %0)
701 declare fp128 @llvm.exp.f128(fp128 %Val)
703 define fp128 @qp_exp2(ptr nocapture readonly %a) {
704 ; CHECK-LABEL: qp_exp2:
705 ; CHECK: # %bb.0: # %entry
706 ; CHECK-NEXT: mflr r0
707 ; CHECK-NEXT: stdu r1, -32(r1)
708 ; CHECK-NEXT: std r0, 48(r1)
709 ; CHECK-NEXT: .cfi_def_cfa_offset 32
710 ; CHECK-NEXT: .cfi_offset lr, 16
711 ; CHECK-NEXT: lxv v2, 0(r3)
712 ; CHECK-NEXT: bl exp2f128
714 ; CHECK-NEXT: addi r1, r1, 32
715 ; CHECK-NEXT: ld r0, 16(r1)
716 ; CHECK-NEXT: mtlr r0
719 ; CHECK-P8-LABEL: qp_exp2:
720 ; CHECK-P8: # %bb.0: # %entry
721 ; CHECK-P8-NEXT: mflr r0
722 ; CHECK-P8-NEXT: stdu r1, -32(r1)
723 ; CHECK-P8-NEXT: std r0, 48(r1)
724 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
725 ; CHECK-P8-NEXT: .cfi_offset lr, 16
726 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
727 ; CHECK-P8-NEXT: xxswapd v2, vs0
728 ; CHECK-P8-NEXT: bl exp2f128
730 ; CHECK-P8-NEXT: addi r1, r1, 32
731 ; CHECK-P8-NEXT: ld r0, 16(r1)
732 ; CHECK-P8-NEXT: mtlr r0
735 %0 = load fp128, ptr %a, align 16
736 %1 = tail call fp128 @llvm.exp2.f128(fp128 %0)
739 declare fp128 @llvm.exp2.f128(fp128 %Val)
741 define dso_local void @qp_powi(ptr nocapture readonly %a, ptr nocapture readonly %b,
742 ; CHECK-LABEL: qp_powi:
743 ; CHECK: # %bb.0: # %entry
744 ; CHECK-NEXT: mflr r0
745 ; CHECK-NEXT: .cfi_def_cfa_offset 48
746 ; CHECK-NEXT: .cfi_offset lr, 16
747 ; CHECK-NEXT: .cfi_offset r30, -16
748 ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
749 ; CHECK-NEXT: stdu r1, -48(r1)
750 ; CHECK-NEXT: std r0, 64(r1)
751 ; CHECK-NEXT: mr r30, r5
752 ; CHECK-NEXT: lxv v2, 0(r3)
753 ; CHECK-NEXT: lwz r5, 0(r4)
754 ; CHECK-NEXT: bl __powikf2
756 ; CHECK-NEXT: stxv v2, 0(r30)
757 ; CHECK-NEXT: addi r1, r1, 48
758 ; CHECK-NEXT: ld r0, 16(r1)
759 ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
760 ; CHECK-NEXT: mtlr r0
763 ; CHECK-P8-LABEL: qp_powi:
764 ; CHECK-P8: # %bb.0: # %entry
765 ; CHECK-P8-NEXT: mflr r0
766 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
767 ; CHECK-P8-NEXT: .cfi_offset lr, 16
768 ; CHECK-P8-NEXT: .cfi_offset r30, -16
769 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
770 ; CHECK-P8-NEXT: stdu r1, -48(r1)
771 ; CHECK-P8-NEXT: std r0, 64(r1)
772 ; CHECK-P8-NEXT: mr r30, r5
773 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
774 ; CHECK-P8-NEXT: lwz r5, 0(r4)
775 ; CHECK-P8-NEXT: xxswapd v2, vs0
776 ; CHECK-P8-NEXT: bl __powikf2
778 ; CHECK-P8-NEXT: xxswapd vs0, v2
779 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
780 ; CHECK-P8-NEXT: addi r1, r1, 48
781 ; CHECK-P8-NEXT: ld r0, 16(r1)
782 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
783 ; CHECK-P8-NEXT: mtlr r0
785 ptr nocapture %res) {
787 %0 = load fp128, ptr %a, align 16
788 %1 = load i32, ptr %b, align 8
789 %2 = tail call fp128 @llvm.powi.f128.i32(fp128 %0, i32 %1)
790 store fp128 %2, ptr %res, align 16
793 declare fp128 @llvm.powi.f128.i32(fp128 %Val, i32 %power)
795 @a = common dso_local global fp128 0xL00000000000000000000000000000000, align 16
796 @b = common dso_local global fp128 0xL00000000000000000000000000000000, align 16
798 define fp128 @qp_frem() #0 {
799 ; CHECK-LABEL: qp_frem:
800 ; CHECK: # %bb.0: # %entry
801 ; CHECK-NEXT: mflr r0
802 ; CHECK-NEXT: stdu r1, -32(r1)
803 ; CHECK-NEXT: std r0, 48(r1)
804 ; CHECK-NEXT: .cfi_def_cfa_offset 32
805 ; CHECK-NEXT: .cfi_offset lr, 16
806 ; CHECK-NEXT: addis r3, r2, a@toc@ha
807 ; CHECK-NEXT: addi r3, r3, a@toc@l
808 ; CHECK-NEXT: lxv v2, 0(r3)
809 ; CHECK-NEXT: addis r3, r2, b@toc@ha
810 ; CHECK-NEXT: addi r3, r3, b@toc@l
811 ; CHECK-NEXT: lxv v3, 0(r3)
812 ; CHECK-NEXT: bl fmodf128
814 ; CHECK-NEXT: addi r1, r1, 32
815 ; CHECK-NEXT: ld r0, 16(r1)
816 ; CHECK-NEXT: mtlr r0
819 ; CHECK-P8-LABEL: qp_frem:
820 ; CHECK-P8: # %bb.0: # %entry
821 ; CHECK-P8-NEXT: mflr r0
822 ; CHECK-P8-NEXT: stdu r1, -32(r1)
823 ; CHECK-P8-NEXT: std r0, 48(r1)
824 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
825 ; CHECK-P8-NEXT: .cfi_offset lr, 16
826 ; CHECK-P8-NEXT: addis r3, r2, a@toc@ha
827 ; CHECK-P8-NEXT: addi r3, r3, a@toc@l
828 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
829 ; CHECK-P8-NEXT: addis r3, r2, b@toc@ha
830 ; CHECK-P8-NEXT: addi r3, r3, b@toc@l
831 ; CHECK-P8-NEXT: xxswapd v2, vs0
832 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
833 ; CHECK-P8-NEXT: xxswapd v3, vs0
834 ; CHECK-P8-NEXT: bl fmodf128
836 ; CHECK-P8-NEXT: addi r1, r1, 32
837 ; CHECK-P8-NEXT: ld r0, 16(r1)
838 ; CHECK-P8-NEXT: mtlr r0
841 %0 = load fp128, ptr @a, align 16
842 %1 = load fp128, ptr @b, align 16
843 %rem = frem fp128 %0, %1
847 define dso_local void @qpCeil(ptr nocapture readonly %a, ptr nocapture %res) {
848 ; CHECK-LABEL: qpCeil:
849 ; CHECK: # %bb.0: # %entry
850 ; CHECK-NEXT: lxv v2, 0(r3)
851 ; CHECK-NEXT: xsrqpi 1, v2, v2, 2
852 ; CHECK-NEXT: stxv v2, 0(r4)
855 ; CHECK-P8-LABEL: qpCeil:
856 ; CHECK-P8: # %bb.0: # %entry
857 ; CHECK-P8-NEXT: mflr r0
858 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
859 ; CHECK-P8-NEXT: .cfi_offset lr, 16
860 ; CHECK-P8-NEXT: .cfi_offset r30, -16
861 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
862 ; CHECK-P8-NEXT: stdu r1, -48(r1)
863 ; CHECK-P8-NEXT: std r0, 64(r1)
864 ; CHECK-P8-NEXT: mr r30, r4
865 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
866 ; CHECK-P8-NEXT: xxswapd v2, vs0
867 ; CHECK-P8-NEXT: bl ceilf128
869 ; CHECK-P8-NEXT: xxswapd vs0, v2
870 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
871 ; CHECK-P8-NEXT: addi r1, r1, 48
872 ; CHECK-P8-NEXT: ld r0, 16(r1)
873 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
874 ; CHECK-P8-NEXT: mtlr r0
877 %0 = load fp128, ptr %a, align 16
878 %1 = tail call fp128 @llvm.ceil.f128(fp128 %0)
879 store fp128 %1, ptr %res, align 16
882 declare fp128 @llvm.ceil.f128(fp128 %Val)
884 define dso_local void @qpFloor(ptr nocapture readonly %a, ptr nocapture %res) {
885 ; CHECK-LABEL: qpFloor:
886 ; CHECK: # %bb.0: # %entry
887 ; CHECK-NEXT: lxv v2, 0(r3)
888 ; CHECK-NEXT: xsrqpi 1, v2, v2, 3
889 ; CHECK-NEXT: stxv v2, 0(r4)
892 ; CHECK-P8-LABEL: qpFloor:
893 ; CHECK-P8: # %bb.0: # %entry
894 ; CHECK-P8-NEXT: mflr r0
895 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
896 ; CHECK-P8-NEXT: .cfi_offset lr, 16
897 ; CHECK-P8-NEXT: .cfi_offset r30, -16
898 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
899 ; CHECK-P8-NEXT: stdu r1, -48(r1)
900 ; CHECK-P8-NEXT: std r0, 64(r1)
901 ; CHECK-P8-NEXT: mr r30, r4
902 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
903 ; CHECK-P8-NEXT: xxswapd v2, vs0
904 ; CHECK-P8-NEXT: bl floorf128
906 ; CHECK-P8-NEXT: xxswapd vs0, v2
907 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
908 ; CHECK-P8-NEXT: addi r1, r1, 48
909 ; CHECK-P8-NEXT: ld r0, 16(r1)
910 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
911 ; CHECK-P8-NEXT: mtlr r0
914 %0 = load fp128, ptr %a, align 16
915 %1 = tail call fp128 @llvm.floor.f128(fp128 %0)
916 store fp128 %1, ptr %res, align 16
919 declare fp128 @llvm.floor.f128(fp128 %Val)
921 define dso_local void @qpTrunc(ptr nocapture readonly %a, ptr nocapture %res) {
922 ; CHECK-LABEL: qpTrunc:
923 ; CHECK: # %bb.0: # %entry
924 ; CHECK-NEXT: lxv v2, 0(r3)
925 ; CHECK-NEXT: xsrqpi 1, v2, v2, 1
926 ; CHECK-NEXT: stxv v2, 0(r4)
929 ; CHECK-P8-LABEL: qpTrunc:
930 ; CHECK-P8: # %bb.0: # %entry
931 ; CHECK-P8-NEXT: mflr r0
932 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
933 ; CHECK-P8-NEXT: .cfi_offset lr, 16
934 ; CHECK-P8-NEXT: .cfi_offset r30, -16
935 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
936 ; CHECK-P8-NEXT: stdu r1, -48(r1)
937 ; CHECK-P8-NEXT: std r0, 64(r1)
938 ; CHECK-P8-NEXT: mr r30, r4
939 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
940 ; CHECK-P8-NEXT: xxswapd v2, vs0
941 ; CHECK-P8-NEXT: bl truncf128
943 ; CHECK-P8-NEXT: xxswapd vs0, v2
944 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
945 ; CHECK-P8-NEXT: addi r1, r1, 48
946 ; CHECK-P8-NEXT: ld r0, 16(r1)
947 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
948 ; CHECK-P8-NEXT: mtlr r0
951 %0 = load fp128, ptr %a, align 16
952 %1 = tail call fp128 @llvm.trunc.f128(fp128 %0)
953 store fp128 %1, ptr %res, align 16
956 declare fp128 @llvm.trunc.f128(fp128 %Val)
958 define dso_local void @qpRound(ptr nocapture readonly %a, ptr nocapture %res) {
959 ; CHECK-LABEL: qpRound:
960 ; CHECK: # %bb.0: # %entry
961 ; CHECK-NEXT: lxv v2, 0(r3)
962 ; CHECK-NEXT: xsrqpi 0, v2, v2, 0
963 ; CHECK-NEXT: stxv v2, 0(r4)
966 ; CHECK-P8-LABEL: qpRound:
967 ; CHECK-P8: # %bb.0: # %entry
968 ; CHECK-P8-NEXT: mflr r0
969 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
970 ; CHECK-P8-NEXT: .cfi_offset lr, 16
971 ; CHECK-P8-NEXT: .cfi_offset r30, -16
972 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
973 ; CHECK-P8-NEXT: stdu r1, -48(r1)
974 ; CHECK-P8-NEXT: std r0, 64(r1)
975 ; CHECK-P8-NEXT: mr r30, r4
976 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
977 ; CHECK-P8-NEXT: xxswapd v2, vs0
978 ; CHECK-P8-NEXT: bl roundf128
980 ; CHECK-P8-NEXT: xxswapd vs0, v2
981 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
982 ; CHECK-P8-NEXT: addi r1, r1, 48
983 ; CHECK-P8-NEXT: ld r0, 16(r1)
984 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
985 ; CHECK-P8-NEXT: mtlr r0
988 %0 = load fp128, ptr %a, align 16
989 %1 = tail call fp128 @llvm.round.f128(fp128 %0)
990 store fp128 %1, ptr %res, align 16
993 declare fp128 @llvm.round.f128(fp128 %Val)
995 define dso_local void @qpLRound(ptr nocapture readonly %a, ptr nocapture %res) {
996 ; CHECK-LABEL: qpLRound:
997 ; CHECK: # %bb.0: # %entry
998 ; CHECK-NEXT: mflr r0
999 ; CHECK-NEXT: .cfi_def_cfa_offset 48
1000 ; CHECK-NEXT: .cfi_offset lr, 16
1001 ; CHECK-NEXT: .cfi_offset r30, -16
1002 ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
1003 ; CHECK-NEXT: stdu r1, -48(r1)
1004 ; CHECK-NEXT: std r0, 64(r1)
1005 ; CHECK-NEXT: mr r30, r4
1006 ; CHECK-NEXT: lxv v2, 0(r3)
1007 ; CHECK-NEXT: bl lroundf128
1009 ; CHECK-NEXT: stw r3, 0(r30)
1010 ; CHECK-NEXT: addi r1, r1, 48
1011 ; CHECK-NEXT: ld r0, 16(r1)
1012 ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
1013 ; CHECK-NEXT: mtlr r0
1016 ; CHECK-P8-LABEL: qpLRound:
1017 ; CHECK-P8: # %bb.0: # %entry
1018 ; CHECK-P8-NEXT: mflr r0
1019 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
1020 ; CHECK-P8-NEXT: .cfi_offset lr, 16
1021 ; CHECK-P8-NEXT: .cfi_offset r30, -16
1022 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
1023 ; CHECK-P8-NEXT: stdu r1, -48(r1)
1024 ; CHECK-P8-NEXT: std r0, 64(r1)
1025 ; CHECK-P8-NEXT: mr r30, r4
1026 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
1027 ; CHECK-P8-NEXT: xxswapd v2, vs0
1028 ; CHECK-P8-NEXT: bl lroundf128
1029 ; CHECK-P8-NEXT: nop
1030 ; CHECK-P8-NEXT: stw r3, 0(r30)
1031 ; CHECK-P8-NEXT: addi r1, r1, 48
1032 ; CHECK-P8-NEXT: ld r0, 16(r1)
1033 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
1034 ; CHECK-P8-NEXT: mtlr r0
1035 ; CHECK-P8-NEXT: blr
1037 %0 = load fp128, ptr %a, align 16
1038 %1 = tail call i32 @llvm.lround.f128(fp128 %0)
1039 store i32 %1, ptr %res, align 16
1042 declare i32 @llvm.lround.f128(fp128 %Val)
1044 define dso_local void @qpLLRound(ptr nocapture readonly %a, ptr nocapture %res) {
1045 ; CHECK-LABEL: qpLLRound:
1046 ; CHECK: # %bb.0: # %entry
1047 ; CHECK-NEXT: mflr r0
1048 ; CHECK-NEXT: .cfi_def_cfa_offset 48
1049 ; CHECK-NEXT: .cfi_offset lr, 16
1050 ; CHECK-NEXT: .cfi_offset r30, -16
1051 ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
1052 ; CHECK-NEXT: stdu r1, -48(r1)
1053 ; CHECK-NEXT: std r0, 64(r1)
1054 ; CHECK-NEXT: mr r30, r4
1055 ; CHECK-NEXT: lxv v2, 0(r3)
1056 ; CHECK-NEXT: bl llroundf128
1058 ; CHECK-NEXT: std r3, 0(r30)
1059 ; CHECK-NEXT: addi r1, r1, 48
1060 ; CHECK-NEXT: ld r0, 16(r1)
1061 ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
1062 ; CHECK-NEXT: mtlr r0
1065 ; CHECK-P8-LABEL: qpLLRound:
1066 ; CHECK-P8: # %bb.0: # %entry
1067 ; CHECK-P8-NEXT: mflr r0
1068 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
1069 ; CHECK-P8-NEXT: .cfi_offset lr, 16
1070 ; CHECK-P8-NEXT: .cfi_offset r30, -16
1071 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
1072 ; CHECK-P8-NEXT: stdu r1, -48(r1)
1073 ; CHECK-P8-NEXT: std r0, 64(r1)
1074 ; CHECK-P8-NEXT: mr r30, r4
1075 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
1076 ; CHECK-P8-NEXT: xxswapd v2, vs0
1077 ; CHECK-P8-NEXT: bl llroundf128
1078 ; CHECK-P8-NEXT: nop
1079 ; CHECK-P8-NEXT: std r3, 0(r30)
1080 ; CHECK-P8-NEXT: addi r1, r1, 48
1081 ; CHECK-P8-NEXT: ld r0, 16(r1)
1082 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
1083 ; CHECK-P8-NEXT: mtlr r0
1084 ; CHECK-P8-NEXT: blr
1086 %0 = load fp128, ptr %a, align 16
1087 %1 = tail call i64 @llvm.llround.f128(fp128 %0)
1088 store i64 %1, ptr %res, align 16
1091 declare i64 @llvm.llround.f128(fp128 %Val)
1093 define dso_local void @qpRint(ptr nocapture readonly %a, ptr nocapture %res) {
1094 ; CHECK-LABEL: qpRint:
1095 ; CHECK: # %bb.0: # %entry
1096 ; CHECK-NEXT: lxv v2, 0(r3)
1097 ; CHECK-NEXT: xsrqpix 0, v2, v2, 3
1098 ; CHECK-NEXT: stxv v2, 0(r4)
1101 ; CHECK-P8-LABEL: qpRint:
1102 ; CHECK-P8: # %bb.0: # %entry
1103 ; CHECK-P8-NEXT: mflr r0
1104 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
1105 ; CHECK-P8-NEXT: .cfi_offset lr, 16
1106 ; CHECK-P8-NEXT: .cfi_offset r30, -16
1107 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
1108 ; CHECK-P8-NEXT: stdu r1, -48(r1)
1109 ; CHECK-P8-NEXT: std r0, 64(r1)
1110 ; CHECK-P8-NEXT: mr r30, r4
1111 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
1112 ; CHECK-P8-NEXT: xxswapd v2, vs0
1113 ; CHECK-P8-NEXT: bl rintf128
1114 ; CHECK-P8-NEXT: nop
1115 ; CHECK-P8-NEXT: xxswapd vs0, v2
1116 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
1117 ; CHECK-P8-NEXT: addi r1, r1, 48
1118 ; CHECK-P8-NEXT: ld r0, 16(r1)
1119 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
1120 ; CHECK-P8-NEXT: mtlr r0
1121 ; CHECK-P8-NEXT: blr
1123 %0 = load fp128, ptr %a, align 16
1124 %1 = tail call fp128 @llvm.rint.f128(fp128 %0)
1125 store fp128 %1, ptr %res, align 16
1128 declare fp128 @llvm.rint.f128(fp128 %Val)
1130 define dso_local void @qpLRint(ptr nocapture readonly %a, ptr nocapture %res) {
1131 ; CHECK-LABEL: qpLRint:
1132 ; CHECK: # %bb.0: # %entry
1133 ; CHECK-NEXT: mflr r0
1134 ; CHECK-NEXT: .cfi_def_cfa_offset 48
1135 ; CHECK-NEXT: .cfi_offset lr, 16
1136 ; CHECK-NEXT: .cfi_offset r30, -16
1137 ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
1138 ; CHECK-NEXT: stdu r1, -48(r1)
1139 ; CHECK-NEXT: std r0, 64(r1)
1140 ; CHECK-NEXT: mr r30, r4
1141 ; CHECK-NEXT: lxv v2, 0(r3)
1142 ; CHECK-NEXT: bl lrintf128
1144 ; CHECK-NEXT: stw r3, 0(r30)
1145 ; CHECK-NEXT: addi r1, r1, 48
1146 ; CHECK-NEXT: ld r0, 16(r1)
1147 ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
1148 ; CHECK-NEXT: mtlr r0
1151 ; CHECK-P8-LABEL: qpLRint:
1152 ; CHECK-P8: # %bb.0: # %entry
1153 ; CHECK-P8-NEXT: mflr r0
1154 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
1155 ; CHECK-P8-NEXT: .cfi_offset lr, 16
1156 ; CHECK-P8-NEXT: .cfi_offset r30, -16
1157 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
1158 ; CHECK-P8-NEXT: stdu r1, -48(r1)
1159 ; CHECK-P8-NEXT: std r0, 64(r1)
1160 ; CHECK-P8-NEXT: mr r30, r4
1161 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
1162 ; CHECK-P8-NEXT: xxswapd v2, vs0
1163 ; CHECK-P8-NEXT: bl lrintf128
1164 ; CHECK-P8-NEXT: nop
1165 ; CHECK-P8-NEXT: stw r3, 0(r30)
1166 ; CHECK-P8-NEXT: addi r1, r1, 48
1167 ; CHECK-P8-NEXT: ld r0, 16(r1)
1168 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
1169 ; CHECK-P8-NEXT: mtlr r0
1170 ; CHECK-P8-NEXT: blr
1172 %0 = load fp128, ptr %a, align 16
1173 %1 = tail call i32 @llvm.lrint.f128(fp128 %0)
1174 store i32 %1, ptr %res, align 16
1177 declare i32 @llvm.lrint.f128(fp128 %Val)
1179 define dso_local void @qpLLRint(ptr nocapture readonly %a, ptr nocapture %res) {
1180 ; CHECK-LABEL: qpLLRint:
1181 ; CHECK: # %bb.0: # %entry
1182 ; CHECK-NEXT: mflr r0
1183 ; CHECK-NEXT: .cfi_def_cfa_offset 48
1184 ; CHECK-NEXT: .cfi_offset lr, 16
1185 ; CHECK-NEXT: .cfi_offset r30, -16
1186 ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
1187 ; CHECK-NEXT: stdu r1, -48(r1)
1188 ; CHECK-NEXT: std r0, 64(r1)
1189 ; CHECK-NEXT: mr r30, r4
1190 ; CHECK-NEXT: lxv v2, 0(r3)
1191 ; CHECK-NEXT: bl llrintf128
1193 ; CHECK-NEXT: std r3, 0(r30)
1194 ; CHECK-NEXT: addi r1, r1, 48
1195 ; CHECK-NEXT: ld r0, 16(r1)
1196 ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
1197 ; CHECK-NEXT: mtlr r0
1200 ; CHECK-P8-LABEL: qpLLRint:
1201 ; CHECK-P8: # %bb.0: # %entry
1202 ; CHECK-P8-NEXT: mflr r0
1203 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
1204 ; CHECK-P8-NEXT: .cfi_offset lr, 16
1205 ; CHECK-P8-NEXT: .cfi_offset r30, -16
1206 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
1207 ; CHECK-P8-NEXT: stdu r1, -48(r1)
1208 ; CHECK-P8-NEXT: std r0, 64(r1)
1209 ; CHECK-P8-NEXT: mr r30, r4
1210 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
1211 ; CHECK-P8-NEXT: xxswapd v2, vs0
1212 ; CHECK-P8-NEXT: bl llrintf128
1213 ; CHECK-P8-NEXT: nop
1214 ; CHECK-P8-NEXT: std r3, 0(r30)
1215 ; CHECK-P8-NEXT: addi r1, r1, 48
1216 ; CHECK-P8-NEXT: ld r0, 16(r1)
1217 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
1218 ; CHECK-P8-NEXT: mtlr r0
1219 ; CHECK-P8-NEXT: blr
1221 %0 = load fp128, ptr %a, align 16
1222 %1 = tail call i64 @llvm.llrint.f128(fp128 %0)
1223 store i64 %1, ptr %res, align 16
1226 declare i64 @llvm.llrint.f128(fp128 %Val)
1228 define dso_local void @qpNearByInt(ptr nocapture readonly %a, ptr nocapture %res) {
1229 ; CHECK-LABEL: qpNearByInt:
1230 ; CHECK: # %bb.0: # %entry
1231 ; CHECK-NEXT: lxv v2, 0(r3)
1232 ; CHECK-NEXT: xsrqpi 0, v2, v2, 3
1233 ; CHECK-NEXT: stxv v2, 0(r4)
1236 ; CHECK-P8-LABEL: qpNearByInt:
1237 ; CHECK-P8: # %bb.0: # %entry
1238 ; CHECK-P8-NEXT: mflr r0
1239 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
1240 ; CHECK-P8-NEXT: .cfi_offset lr, 16
1241 ; CHECK-P8-NEXT: .cfi_offset r30, -16
1242 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
1243 ; CHECK-P8-NEXT: stdu r1, -48(r1)
1244 ; CHECK-P8-NEXT: std r0, 64(r1)
1245 ; CHECK-P8-NEXT: mr r30, r4
1246 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
1247 ; CHECK-P8-NEXT: xxswapd v2, vs0
1248 ; CHECK-P8-NEXT: bl nearbyintf128
1249 ; CHECK-P8-NEXT: nop
1250 ; CHECK-P8-NEXT: xxswapd vs0, v2
1251 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
1252 ; CHECK-P8-NEXT: addi r1, r1, 48
1253 ; CHECK-P8-NEXT: ld r0, 16(r1)
1254 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
1255 ; CHECK-P8-NEXT: mtlr r0
1256 ; CHECK-P8-NEXT: blr
1258 %0 = load fp128, ptr %a, align 16
1259 %1 = tail call fp128 @llvm.nearbyint.f128(fp128 %0)
1260 store fp128 %1, ptr %res, align 16
1263 declare fp128 @llvm.nearbyint.f128(fp128 %Val)
1265 define dso_local void @qpFMA(ptr %a, ptr %b, ptr %c, ptr %res) {
1266 ; CHECK-LABEL: qpFMA:
1267 ; CHECK: # %bb.0: # %entry
1268 ; CHECK-NEXT: lxv v2, 0(r3)
1269 ; CHECK-NEXT: lxv v3, 0(r4)
1270 ; CHECK-NEXT: lxv v4, 0(r5)
1271 ; CHECK-NEXT: xsmaddqp v4, v2, v3
1272 ; CHECK-NEXT: stxv v4, 0(r6)
1275 ; CHECK-P8-LABEL: qpFMA:
1276 ; CHECK-P8: # %bb.0: # %entry
1277 ; CHECK-P8-NEXT: mflr r0
1278 ; CHECK-P8-NEXT: .cfi_def_cfa_offset 48
1279 ; CHECK-P8-NEXT: .cfi_offset lr, 16
1280 ; CHECK-P8-NEXT: .cfi_offset r30, -16
1281 ; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
1282 ; CHECK-P8-NEXT: stdu r1, -48(r1)
1283 ; CHECK-P8-NEXT: std r0, 64(r1)
1284 ; CHECK-P8-NEXT: mr r30, r6
1285 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
1286 ; CHECK-P8-NEXT: xxswapd v2, vs0
1287 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
1288 ; CHECK-P8-NEXT: xxswapd v3, vs0
1289 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r5
1290 ; CHECK-P8-NEXT: xxswapd v4, vs0
1291 ; CHECK-P8-NEXT: bl fmaf128
1292 ; CHECK-P8-NEXT: nop
1293 ; CHECK-P8-NEXT: xxswapd vs0, v2
1294 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r30
1295 ; CHECK-P8-NEXT: addi r1, r1, 48
1296 ; CHECK-P8-NEXT: ld r0, 16(r1)
1297 ; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
1298 ; CHECK-P8-NEXT: mtlr r0
1299 ; CHECK-P8-NEXT: blr
1301 %0 = load fp128, ptr %a, align 16
1302 %1 = load fp128, ptr %b, align 16
1303 %2 = load fp128, ptr %c, align 16
1304 %3 = tail call fp128 @llvm.fma.f128(fp128 %0, fp128 %1, fp128 %2)
1305 store fp128 %3, ptr %res, align 16
1308 declare fp128 @llvm.fma.f128(fp128, fp128, fp128)