1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux \
3 ; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr | FileCheck \
4 ; RUN: --check-prefix=P8 %s
5 ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux \
6 ; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr | FileCheck \
7 ; RUN: --check-prefix=P9 %s
9 ; FIXME: Constrained fpext would fail if VSX feature disabled. Add no-vsx
11 declare float @llvm.experimental.constrained.ceil.f32(float, metadata)
12 declare double @llvm.experimental.constrained.ceil.f64(double, metadata)
13 declare <4 x float> @llvm.experimental.constrained.ceil.v4f32(<4 x float>, metadata)
14 declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata)
16 declare float @llvm.experimental.constrained.floor.f32(float, metadata)
17 declare double @llvm.experimental.constrained.floor.f64(double, metadata)
18 declare <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x float>, metadata)
19 declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata)
21 declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
22 declare <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float>, metadata, metadata)
23 declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata)
25 declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(<4 x float>, metadata)
26 declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata)
28 declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata)
29 declare <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double>, metadata, metadata)
30 declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata)
32 declare float @llvm.experimental.constrained.round.f32(float, metadata)
33 declare double @llvm.experimental.constrained.round.f64(double, metadata)
34 declare <4 x float> @llvm.experimental.constrained.round.v4f32(<4 x float>, metadata)
35 declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata)
37 declare float @llvm.experimental.constrained.trunc.f32(float, metadata)
38 declare double @llvm.experimental.constrained.trunc.f64(double, metadata)
39 declare <4 x float> @llvm.experimental.constrained.trunc.v4f32(<4 x float>, metadata)
40 declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata)
42 define float @ceil_f32(float %f1) strictfp {
45 ; P8-NEXT: xsrdpip f1, f1
50 ; P9-NEXT: xsrdpip f1, f1
52 %res = call float @llvm.experimental.constrained.ceil.f32(
54 metadata !"fpexcept.strict")
58 define double @ceil_f64(double %f1) strictfp {
61 ; P8-NEXT: xsrdpip f1, f1
66 ; P9-NEXT: xsrdpip f1, f1
68 %res = call double @llvm.experimental.constrained.ceil.f64(
70 metadata !"fpexcept.strict")
74 define <4 x float> @ceil_v4f32(<4 x float> %vf1) strictfp {
75 ; P8-LABEL: ceil_v4f32:
77 ; P8-NEXT: xvrspip v2, v2
80 ; P9-LABEL: ceil_v4f32:
82 ; P9-NEXT: xvrspip v2, v2
84 %res = call <4 x float> @llvm.experimental.constrained.ceil.v4f32(
86 metadata !"fpexcept.strict")
90 define <2 x double> @ceil_v2f64(<2 x double> %vf1) strictfp {
91 ; P8-LABEL: ceil_v2f64:
93 ; P8-NEXT: xvrdpip v2, v2
96 ; P9-LABEL: ceil_v2f64:
98 ; P9-NEXT: xvrdpip v2, v2
100 %res = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(
102 metadata !"fpexcept.strict")
103 ret <2 x double> %res
106 define float @floor_f32(float %f1) strictfp {
107 ; P8-LABEL: floor_f32:
109 ; P8-NEXT: xsrdpim f1, f1
112 ; P9-LABEL: floor_f32:
114 ; P9-NEXT: xsrdpim f1, f1
116 %res = call float @llvm.experimental.constrained.floor.f32(
118 metadata !"fpexcept.strict")
122 define double @floor_f64(double %f1) strictfp {
123 ; P8-LABEL: floor_f64:
125 ; P8-NEXT: xsrdpim f1, f1
128 ; P9-LABEL: floor_f64:
130 ; P9-NEXT: xsrdpim f1, f1
132 %res = call double @llvm.experimental.constrained.floor.f64(
134 metadata !"fpexcept.strict")
138 define <4 x float> @floor_v4f32(<4 x float> %vf1) strictfp {
139 ; P8-LABEL: floor_v4f32:
141 ; P8-NEXT: xvrspim v2, v2
144 ; P9-LABEL: floor_v4f32:
146 ; P9-NEXT: xvrspim v2, v2
148 %res = call <4 x float> @llvm.experimental.constrained.floor.v4f32(
150 metadata !"fpexcept.strict")
151 ret <4 x float> %res;
154 define <2 x double> @floor_v2f64(<2 x double> %vf1) strictfp {
155 ; P8-LABEL: floor_v2f64:
157 ; P8-NEXT: xvrdpim v2, v2
160 ; P9-LABEL: floor_v2f64:
162 ; P9-NEXT: xvrdpim v2, v2
164 %res = call <2 x double> @llvm.experimental.constrained.floor.v2f64(
166 metadata !"fpexcept.strict")
167 ret <2 x double> %res;
170 define double @nearbyint_f64(double %f1, double %f2) strictfp {
171 ; P8-LABEL: nearbyint_f64:
174 ; P8-NEXT: stdu r1, -112(r1)
175 ; P8-NEXT: std r0, 128(r1)
176 ; P8-NEXT: .cfi_def_cfa_offset 112
177 ; P8-NEXT: .cfi_offset lr, 16
178 ; P8-NEXT: bl nearbyint
180 ; P8-NEXT: addi r1, r1, 112
181 ; P8-NEXT: ld r0, 16(r1)
185 ; P9-LABEL: nearbyint_f64:
188 ; P9-NEXT: stdu r1, -32(r1)
189 ; P9-NEXT: std r0, 48(r1)
190 ; P9-NEXT: .cfi_def_cfa_offset 32
191 ; P9-NEXT: .cfi_offset lr, 16
192 ; P9-NEXT: bl nearbyint
194 ; P9-NEXT: addi r1, r1, 32
195 ; P9-NEXT: ld r0, 16(r1)
198 %res = call double @llvm.experimental.constrained.nearbyint.f64(
200 metadata !"round.dynamic",
201 metadata !"fpexcept.strict")
205 define <4 x float> @nearbyint_v4f32(<4 x float> %vf1, <4 x float> %vf2) strictfp {
206 ; P8-LABEL: nearbyint_v4f32:
209 ; P8-NEXT: stdu r1, -176(r1)
210 ; P8-NEXT: std r0, 192(r1)
211 ; P8-NEXT: .cfi_def_cfa_offset 176
212 ; P8-NEXT: .cfi_offset lr, 16
213 ; P8-NEXT: .cfi_offset v29, -48
214 ; P8-NEXT: .cfi_offset v30, -32
215 ; P8-NEXT: .cfi_offset v31, -16
216 ; P8-NEXT: xxsldwi vs0, v2, v2, 3
217 ; P8-NEXT: li r3, 128
218 ; P8-NEXT: xscvspdpn f1, vs0
219 ; P8-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill
220 ; P8-NEXT: li r3, 144
221 ; P8-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
222 ; P8-NEXT: li r3, 160
223 ; P8-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
224 ; P8-NEXT: vmr v31, v2
225 ; P8-NEXT: bl nearbyintf
227 ; P8-NEXT: xxsldwi vs0, v31, v31, 1
228 ; P8-NEXT: xxlor v30, f1, f1
229 ; P8-NEXT: xscvspdpn f1, vs0
230 ; P8-NEXT: bl nearbyintf
232 ; P8-NEXT: xxmrghd vs0, vs1, v30
233 ; P8-NEXT: xscvspdpn f1, v31
234 ; P8-NEXT: xvcvdpsp v29, vs0
235 ; P8-NEXT: bl nearbyintf
237 ; P8-NEXT: xxswapd vs0, v31
238 ; P8-NEXT: xxlor v30, f1, f1
239 ; P8-NEXT: xscvspdpn f1, vs0
240 ; P8-NEXT: bl nearbyintf
242 ; P8-NEXT: xxmrghd vs0, v30, vs1
243 ; P8-NEXT: li r3, 160
244 ; P8-NEXT: xvcvdpsp v2, vs0
245 ; P8-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
246 ; P8-NEXT: li r3, 144
247 ; P8-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
248 ; P8-NEXT: li r3, 128
249 ; P8-NEXT: vmrgew v2, v2, v29
250 ; P8-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload
251 ; P8-NEXT: addi r1, r1, 176
252 ; P8-NEXT: ld r0, 16(r1)
256 ; P9-LABEL: nearbyint_v4f32:
259 ; P9-NEXT: stdu r1, -80(r1)
260 ; P9-NEXT: std r0, 96(r1)
261 ; P9-NEXT: .cfi_def_cfa_offset 80
262 ; P9-NEXT: .cfi_offset lr, 16
263 ; P9-NEXT: .cfi_offset v29, -48
264 ; P9-NEXT: .cfi_offset v30, -32
265 ; P9-NEXT: .cfi_offset v31, -16
266 ; P9-NEXT: xxsldwi vs0, v2, v2, 3
267 ; P9-NEXT: stxv v29, 32(r1) # 16-byte Folded Spill
268 ; P9-NEXT: xscvspdpn f1, vs0
269 ; P9-NEXT: stxv v30, 48(r1) # 16-byte Folded Spill
270 ; P9-NEXT: stxv v31, 64(r1) # 16-byte Folded Spill
271 ; P9-NEXT: vmr v31, v2
272 ; P9-NEXT: bl nearbyintf
274 ; P9-NEXT: xxsldwi vs0, v31, v31, 1
275 ; P9-NEXT: xscpsgndp v30, f1, f1
276 ; P9-NEXT: xscvspdpn f1, vs0
277 ; P9-NEXT: bl nearbyintf
279 ; P9-NEXT: xxmrghd vs0, vs1, v30
280 ; P9-NEXT: xscvspdpn f1, v31
281 ; P9-NEXT: xvcvdpsp v29, vs0
282 ; P9-NEXT: bl nearbyintf
284 ; P9-NEXT: xxswapd vs0, v31
285 ; P9-NEXT: xscpsgndp v30, f1, f1
286 ; P9-NEXT: xscvspdpn f1, vs0
287 ; P9-NEXT: bl nearbyintf
289 ; P9-NEXT: xxmrghd vs0, v30, vs1
290 ; P9-NEXT: lxv v31, 64(r1) # 16-byte Folded Reload
291 ; P9-NEXT: lxv v30, 48(r1) # 16-byte Folded Reload
292 ; P9-NEXT: xvcvdpsp v2, vs0
293 ; P9-NEXT: vmrgew v2, v2, v29
294 ; P9-NEXT: lxv v29, 32(r1) # 16-byte Folded Reload
295 ; P9-NEXT: addi r1, r1, 80
296 ; P9-NEXT: ld r0, 16(r1)
299 %res = call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(
301 metadata !"round.dynamic",
302 metadata !"fpexcept.strict")
306 define <2 x double> @nearbyint_v2f64(<2 x double> %vf1, <2 x double> %vf2) strictfp {
307 ; P8-LABEL: nearbyint_v2f64:
310 ; P8-NEXT: stdu r1, -160(r1)
311 ; P8-NEXT: std r0, 176(r1)
312 ; P8-NEXT: .cfi_def_cfa_offset 160
313 ; P8-NEXT: .cfi_offset lr, 16
314 ; P8-NEXT: .cfi_offset v30, -32
315 ; P8-NEXT: .cfi_offset v31, -16
316 ; P8-NEXT: li r3, 128
317 ; P8-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill
318 ; P8-NEXT: li r3, 144
319 ; P8-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill
320 ; P8-NEXT: vmr v31, v2
321 ; P8-NEXT: xxlor f1, v31, v31
322 ; P8-NEXT: bl nearbyint
324 ; P8-NEXT: xxlor v30, f1, f1
325 ; P8-NEXT: xxswapd vs1, v31
326 ; P8-NEXT: bl nearbyint
328 ; P8-NEXT: li r3, 144
329 ; P8-NEXT: xxmrghd v2, v30, vs1
330 ; P8-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload
331 ; P8-NEXT: li r3, 128
332 ; P8-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload
333 ; P8-NEXT: addi r1, r1, 160
334 ; P8-NEXT: ld r0, 16(r1)
338 ; P9-LABEL: nearbyint_v2f64:
341 ; P9-NEXT: stdu r1, -64(r1)
342 ; P9-NEXT: std r0, 80(r1)
343 ; P9-NEXT: .cfi_def_cfa_offset 64
344 ; P9-NEXT: .cfi_offset lr, 16
345 ; P9-NEXT: .cfi_offset v30, -32
346 ; P9-NEXT: .cfi_offset v31, -16
347 ; P9-NEXT: stxv v31, 48(r1) # 16-byte Folded Spill
348 ; P9-NEXT: vmr v31, v2
349 ; P9-NEXT: xscpsgndp f1, v31, v31
350 ; P9-NEXT: stxv v30, 32(r1) # 16-byte Folded Spill
351 ; P9-NEXT: bl nearbyint
353 ; P9-NEXT: xscpsgndp v30, f1, f1
354 ; P9-NEXT: xxswapd vs1, v31
355 ; P9-NEXT: bl nearbyint
357 ; P9-NEXT: xxmrghd v2, v30, vs1
358 ; P9-NEXT: lxv v31, 48(r1) # 16-byte Folded Reload
359 ; P9-NEXT: lxv v30, 32(r1) # 16-byte Folded Reload
360 ; P9-NEXT: addi r1, r1, 64
361 ; P9-NEXT: ld r0, 16(r1)
364 %res = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(
366 metadata !"round.dynamic",
367 metadata !"fpexcept.strict")
368 ret <2 x double> %res
371 define <4 x double> @fpext_v4f64_v4f32(<4 x float> %vf1) strictfp {
372 ; P8-LABEL: fpext_v4f64_v4f32:
374 ; P8-NEXT: xxsldwi vs0, v2, v2, 1
375 ; P8-NEXT: xscvspdpn f3, v2
376 ; P8-NEXT: xxsldwi vs1, v2, v2, 3
377 ; P8-NEXT: xxswapd vs2, v2
378 ; P8-NEXT: xscvspdpn f0, vs0
379 ; P8-NEXT: xxmrghd v2, vs3, vs0
380 ; P8-NEXT: xscvspdpn f0, vs1
381 ; P8-NEXT: xscvspdpn f1, vs2
382 ; P8-NEXT: xxmrghd v3, vs1, vs0
385 ; P9-LABEL: fpext_v4f64_v4f32:
387 ; P9-NEXT: xxsldwi vs0, v2, v2, 3
388 ; P9-NEXT: xxswapd vs1, v2
389 ; P9-NEXT: xscvspdpn f0, vs0
390 ; P9-NEXT: xscvspdpn f1, vs1
391 ; P9-NEXT: xxsldwi vs2, v2, v2, 1
392 ; P9-NEXT: xscvspdpn f2, vs2
393 ; P9-NEXT: xxmrghd vs0, vs1, vs0
394 ; P9-NEXT: xscvspdpn f1, v2
395 ; P9-NEXT: xxmrghd v3, vs1, vs2
396 ; P9-NEXT: xxlor v2, vs0, vs0
398 %res = call <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(
400 metadata !"fpexcept.strict")
401 ret <4 x double> %res
404 define <2 x double> @fpext_v2f64_v2f32(<2 x float> %vf1) strictfp {
405 ; P8-LABEL: fpext_v2f64_v2f32:
407 ; P8-NEXT: xxsldwi vs0, v2, v2, 1
408 ; P8-NEXT: xscvspdpn f1, v2
409 ; P8-NEXT: xscvspdpn f0, vs0
410 ; P8-NEXT: xxmrghd v2, vs1, vs0
413 ; P9-LABEL: fpext_v2f64_v2f32:
415 ; P9-NEXT: xxsldwi vs0, v2, v2, 3
416 ; P9-NEXT: xxswapd vs1, v2
417 ; P9-NEXT: xscvspdpn f0, vs0
418 ; P9-NEXT: xscvspdpn f1, vs1
419 ; P9-NEXT: xxmrghd v2, vs1, vs0
421 %res = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(
423 metadata !"fpexcept.strict")
424 ret <2 x double> %res
427 define float @fptrunc_f32_f64(double %f1) strictfp {
428 ; P8-LABEL: fptrunc_f32_f64:
430 ; P8-NEXT: xsrsp f1, f1
433 ; P9-LABEL: fptrunc_f32_f64:
435 ; P9-NEXT: xsrsp f1, f1
437 %res = call float @llvm.experimental.constrained.fptrunc.f32.f64(
439 metadata !"round.dynamic",
440 metadata !"fpexcept.strict")
444 define <4 x float> @fptrunc_v4f32_v4f64(<4 x double> %vf1) strictfp {
445 ; P8-LABEL: fptrunc_v4f32_v4f64:
447 ; P8-NEXT: xxmrgld vs0, v2, v3
448 ; P8-NEXT: xxmrghd vs1, v2, v3
449 ; P8-NEXT: xvcvdpsp v2, vs0
450 ; P8-NEXT: xvcvdpsp v3, vs1
451 ; P8-NEXT: vmrgew v2, v3, v2
454 ; P9-LABEL: fptrunc_v4f32_v4f64:
456 ; P9-NEXT: xxmrgld vs0, v3, v2
457 ; P9-NEXT: xvcvdpsp v4, vs0
458 ; P9-NEXT: xxmrghd vs0, v3, v2
459 ; P9-NEXT: xvcvdpsp v2, vs0
460 ; P9-NEXT: vmrgew v2, v2, v4
462 %res = call <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(
464 metadata !"round.dynamic",
465 metadata !"fpexcept.strict")
469 define <2 x float> @fptrunc_v2f32_v2f64(<2 x double> %vf1) strictfp {
470 ; P8-LABEL: fptrunc_v2f32_v2f64:
472 ; P8-NEXT: xxswapd vs0, v2
473 ; P8-NEXT: xsrsp f1, v2
474 ; P8-NEXT: xsrsp f0, f0
475 ; P8-NEXT: xscvdpspn v2, f1
476 ; P8-NEXT: xscvdpspn v3, f0
477 ; P8-NEXT: vmrgow v2, v2, v3
480 ; P9-LABEL: fptrunc_v2f32_v2f64:
482 ; P9-NEXT: xxswapd vs1, v2
483 ; P9-NEXT: xsrsp f0, v2
484 ; P9-NEXT: xsrsp f1, f1
485 ; P9-NEXT: xscvdpspn vs0, f0
486 ; P9-NEXT: xscvdpspn vs1, f1
487 ; P9-NEXT: xxmrghw v2, vs0, vs1
489 %res = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(
491 metadata !"round.dynamic",
492 metadata !"fpexcept.strict")
496 define float @round_f32(float %f1) strictfp {
497 ; P8-LABEL: round_f32:
499 ; P8-NEXT: xsrdpi f1, f1
502 ; P9-LABEL: round_f32:
504 ; P9-NEXT: xsrdpi f1, f1
506 %res = call float @llvm.experimental.constrained.round.f32(
508 metadata !"fpexcept.strict")
512 define double @round_f64(double %f1) strictfp {
513 ; P8-LABEL: round_f64:
515 ; P8-NEXT: xsrdpi f1, f1
518 ; P9-LABEL: round_f64:
520 ; P9-NEXT: xsrdpi f1, f1
522 %res = call double @llvm.experimental.constrained.round.f64(
524 metadata !"fpexcept.strict")
528 define <4 x float> @round_v4f32(<4 x float> %vf1) strictfp {
529 ; P8-LABEL: round_v4f32:
531 ; P8-NEXT: xvrspi v2, v2
534 ; P9-LABEL: round_v4f32:
536 ; P9-NEXT: xvrspi v2, v2
538 %res = call <4 x float> @llvm.experimental.constrained.round.v4f32(
540 metadata !"fpexcept.strict")
544 define <2 x double> @round_v2f64(<2 x double> %vf1) strictfp {
545 ; P8-LABEL: round_v2f64:
547 ; P8-NEXT: xvrdpi v2, v2
550 ; P9-LABEL: round_v2f64:
552 ; P9-NEXT: xvrdpi v2, v2
554 %res = call <2 x double> @llvm.experimental.constrained.round.v2f64(
556 metadata !"fpexcept.strict")
557 ret <2 x double> %res
560 define float @trunc_f32(float %f1) strictfp {
561 ; P8-LABEL: trunc_f32:
563 ; P8-NEXT: xsrdpiz f1, f1
566 ; P9-LABEL: trunc_f32:
568 ; P9-NEXT: xsrdpiz f1, f1
570 %res = call float @llvm.experimental.constrained.trunc.f32(
572 metadata !"fpexcept.strict")
576 define double @trunc_f64(double %f1) strictfp {
577 ; P8-LABEL: trunc_f64:
579 ; P8-NEXT: xsrdpiz f1, f1
582 ; P9-LABEL: trunc_f64:
584 ; P9-NEXT: xsrdpiz f1, f1
586 %res = call double @llvm.experimental.constrained.trunc.f64(
588 metadata !"fpexcept.strict")
592 define <4 x float> @trunc_v4f32(<4 x float> %vf1) strictfp {
593 ; P8-LABEL: trunc_v4f32:
595 ; P8-NEXT: xvrspiz v2, v2
598 ; P9-LABEL: trunc_v4f32:
600 ; P9-NEXT: xvrspiz v2, v2
602 %res = call <4 x float> @llvm.experimental.constrained.trunc.v4f32(
604 metadata !"fpexcept.strict")
608 define <2 x double> @trunc_v2f64(<2 x double> %vf1) strictfp {
609 ; P8-LABEL: trunc_v2f64:
611 ; P8-NEXT: xvrdpiz v2, v2
614 ; P9-LABEL: trunc_v2f64:
616 ; P9-NEXT: xvrdpiz v2, v2
618 %res = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(
620 metadata !"fpexcept.strict")
621 ret <2 x double> %res