1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3 ; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \
4 ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
5 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
6 ; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \
7 ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
9 define void @testMultiply(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, ptr nocapture noundef writeonly %c) local_unnamed_addr #0 {
10 ; CHECK-LABEL: testMultiply:
11 ; CHECK: # %bb.0: # %entry
13 ; CHECK-NEXT: std r30, -16(r1)
14 ; CHECK-NEXT: mr r30, r1
15 ; CHECK-NEXT: std r0, 16(r1)
16 ; CHECK-NEXT: clrldi r0, r1, 59
17 ; CHECK-NEXT: subfic r0, r0, -128
18 ; CHECK-NEXT: stdux r1, r1, r0
19 ; CHECK-NEXT: stxv v30, -64(r30) # 16-byte Folded Spill
20 ; CHECK-NEXT: stxv v31, -48(r30) # 16-byte Folded Spill
21 ; CHECK-NEXT: lxv v31, 0(r3)
22 ; CHECK-NEXT: lxv v30, 0(r4)
23 ; CHECK-NEXT: addi r3, r1, 32
24 ; CHECK-NEXT: vmr v2, v31
25 ; CHECK-NEXT: vmr v3, v30
26 ; CHECK-NEXT: std r29, -24(r30) # 8-byte Folded Spill
27 ; CHECK-NEXT: mr r29, r5
28 ; CHECK-NEXT: bl _Z15buildVectorPairPu13__vector_pairDv16_hS0_@notoc
29 ; CHECK-NEXT: xxsetaccz acc0
30 ; CHECK-NEXT: xvf32gerpp acc0, v31, v30
31 ; CHECK-NEXT: lxv v3, 32(r1)
32 ; CHECK-NEXT: lxv v2, 48(r1)
33 ; CHECK-NEXT: xvf32gerpp acc0, v3, v2
34 ; CHECK-NEXT: lxv v31, -48(r30) # 16-byte Folded Reload
35 ; CHECK-NEXT: lxv v30, -64(r30) # 16-byte Folded Reload
36 ; CHECK-NEXT: xxmfacc acc0
37 ; CHECK-NEXT: stxv vs3, 0(r29)
38 ; CHECK-NEXT: pstxv vs2, 8(r29), 0
39 ; CHECK-NEXT: stxv vs1, 16(r29)
40 ; CHECK-NEXT: pstxv vs0, 24(r29), 0
41 ; CHECK-NEXT: ld r29, -24(r30) # 8-byte Folded Reload
42 ; CHECK-NEXT: mr r1, r30
43 ; CHECK-NEXT: ld r0, 16(r1)
44 ; CHECK-NEXT: ld r30, -16(r1)
48 ; CHECK-BE-LABEL: testMultiply:
49 ; CHECK-BE: # %bb.0: # %entry
50 ; CHECK-BE-NEXT: mflr r0
51 ; CHECK-BE-NEXT: std r30, -16(r1)
52 ; CHECK-BE-NEXT: mr r30, r1
53 ; CHECK-BE-NEXT: std r0, 16(r1)
54 ; CHECK-BE-NEXT: clrldi r0, r1, 59
55 ; CHECK-BE-NEXT: subfic r0, r0, -224
56 ; CHECK-BE-NEXT: stdux r1, r1, r0
57 ; CHECK-BE-NEXT: stxv v30, -64(r30) # 16-byte Folded Spill
58 ; CHECK-BE-NEXT: stxv v31, -48(r30) # 16-byte Folded Spill
59 ; CHECK-BE-NEXT: lxv v31, 0(r3)
60 ; CHECK-BE-NEXT: lxv v30, 0(r4)
61 ; CHECK-BE-NEXT: addi r3, r1, 128
62 ; CHECK-BE-NEXT: vmr v2, v31
63 ; CHECK-BE-NEXT: vmr v3, v30
64 ; CHECK-BE-NEXT: std r29, -24(r30) # 8-byte Folded Spill
65 ; CHECK-BE-NEXT: mr r29, r5
66 ; CHECK-BE-NEXT: bl _Z15buildVectorPairPu13__vector_pairDv16_hS0_
68 ; CHECK-BE-NEXT: xxsetaccz acc1
69 ; CHECK-BE-NEXT: xvf32gerpp acc1, v31, v30
70 ; CHECK-BE-NEXT: lxv v3, 144(r1)
71 ; CHECK-BE-NEXT: lxv v2, 128(r1)
72 ; CHECK-BE-NEXT: xvf32gerpp acc1, v2, v3
73 ; CHECK-BE-NEXT: lxv v31, -48(r30) # 16-byte Folded Reload
74 ; CHECK-BE-NEXT: lxv v30, -64(r30) # 16-byte Folded Reload
75 ; CHECK-BE-NEXT: xxmfacc acc1
76 ; CHECK-BE-NEXT: xxlor vs1, vs6, vs6
77 ; CHECK-BE-NEXT: xxlor vs0, vs7, vs7
78 ; CHECK-BE-NEXT: xxlor vs3, vs4, vs4
79 ; CHECK-BE-NEXT: xxlor vs2, vs5, vs5
80 ; CHECK-BE-NEXT: stxv vs0, 0(r29)
81 ; CHECK-BE-NEXT: pstxv vs1, 8(r29), 0
82 ; CHECK-BE-NEXT: stxv vs2, 16(r29)
83 ; CHECK-BE-NEXT: pstxv vs3, 24(r29), 0
84 ; CHECK-BE-NEXT: ld r29, -24(r30) # 8-byte Folded Reload
85 ; CHECK-BE-NEXT: mr r1, r30
86 ; CHECK-BE-NEXT: ld r0, 16(r1)
87 ; CHECK-BE-NEXT: ld r30, -16(r1)
88 ; CHECK-BE-NEXT: mtlr r0
91 %vP = alloca <256 x i1>, align 32
92 call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %vP)
93 %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
94 %1 = load <16 x i8>, ptr %a, align 16
95 %2 = load <16 x i8>, ptr %b, align 16
96 call void @_Z15buildVectorPairPu13__vector_pairDv16_hS0_(ptr noundef nonnull %vP, <16 x i8> noundef %1, <16 x i8> noundef %2)
97 %3 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %1, <16 x i8> %2)
98 %4 = load <256 x i1>, ptr %vP, align 32
99 %5 = call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %4)
100 %6 = extractvalue { <16 x i8>, <16 x i8> } %5, 0
101 %7 = extractvalue { <16 x i8>, <16 x i8> } %5, 1
102 %8 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %3, <16 x i8> %6, <16 x i8> %7)
103 %9 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %8)
104 %10 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %9, 0
105 %11 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %9, 1
106 %12 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %9, 2
107 %13 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %9, 3
108 %14 = call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %13, <16 x i8> %12, <16 x i8> %11, <16 x i8> %10)
109 %15 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %14)
110 %16 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %15, 0
111 %17 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %15, 1
112 %18 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %15, 2
113 %19 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %15, 3
114 store <16 x i8> %16, ptr %c, align 16
115 %add.ptr = getelementptr inbounds float, ptr %c, i64 2
116 store <16 x i8> %17, ptr %add.ptr, align 16
117 %add.ptr11 = getelementptr inbounds float, ptr %c, i64 4
118 store <16 x i8> %18, ptr %add.ptr11, align 16
119 %add.ptr13 = getelementptr inbounds float, ptr %c, i64 6
120 store <16 x i8> %19, ptr %add.ptr13, align 16
121 call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %vP)
125 declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
126 declare <512 x i1> @llvm.ppc.mma.xxsetaccz()
127 declare void @_Z15buildVectorPairPu13__vector_pairDv16_hS0_(ptr noundef, <16 x i8> noundef, <16 x i8> noundef) local_unnamed_addr
128 declare <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1>, <16 x i8>, <16 x i8>)
129 declare { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1>)
130 declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1>)
131 declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
132 declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
134 attributes #0 = { nounwind }