1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3 ; RUN: -mcpu=pwr10 -ppc-track-subreg-liveness -ppc-asm-full-reg-names \
4 ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
5 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
6 ; RUN: -mcpu=pwr10 -ppc-track-subreg-liveness -ppc-asm-full-reg-names \
7 ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
9 declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
10 declare <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8>, <16 x i8>)
11 define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i8> %vc4, i8* %ptr) {
12 ; CHECK-LABEL: intrinsics1:
14 ; CHECK-NEXT: vmr v1, v4
15 ; CHECK-NEXT: vmr v4, v3
16 ; CHECK-NEXT: ld r3, 96(r1)
17 ; CHECK-NEXT: vmr v0, v2
18 ; CHECK-NEXT: xxlor vs3, v5, v5
19 ; CHECK-NEXT: xxlor vs0, v0, v0
20 ; CHECK-NEXT: xxlor vs1, v1, v1
21 ; CHECK-NEXT: xxlor vs2, v4, v4
22 ; CHECK-NEXT: xxmtacc acc0
23 ; CHECK-NEXT: xvi4ger8pp acc0, v2, v3
24 ; CHECK-NEXT: xvf16ger2pp acc0, v2, v1
25 ; CHECK-NEXT: pmxvf32gerpn acc0, v3, v5, 0, 0
26 ; CHECK-NEXT: vmr v3, v2
27 ; CHECK-NEXT: vmr v2, v5
28 ; CHECK-NEXT: pmxvf64gernp acc0, vsp34, v0, 0, 0
29 ; CHECK-NEXT: xxmfacc acc0
30 ; CHECK-NEXT: stxv vs0, 48(r3)
31 ; CHECK-NEXT: stxv vs1, 32(r3)
32 ; CHECK-NEXT: stxv vs2, 16(r3)
33 ; CHECK-NEXT: stxv vs3, 0(r3)
36 ; CHECK-BE-LABEL: intrinsics1:
38 ; CHECK-BE-NEXT: vmr v1, v4
39 ; CHECK-BE-NEXT: vmr v4, v3
40 ; CHECK-BE-NEXT: ld r3, 112(r1)
41 ; CHECK-BE-NEXT: vmr v0, v2
42 ; CHECK-BE-NEXT: xxlor vs3, v5, v5
43 ; CHECK-BE-NEXT: xxlor vs0, v0, v0
44 ; CHECK-BE-NEXT: xxlor vs1, v1, v1
45 ; CHECK-BE-NEXT: xxlor vs2, v4, v4
46 ; CHECK-BE-NEXT: xxmtacc acc0
47 ; CHECK-BE-NEXT: xvi4ger8pp acc0, v2, v3
48 ; CHECK-BE-NEXT: xvf16ger2pp acc0, v2, v1
49 ; CHECK-BE-NEXT: pmxvf32gerpn acc0, v3, v5, 0, 0
50 ; CHECK-BE-NEXT: vmr v3, v2
51 ; CHECK-BE-NEXT: vmr v2, v5
52 ; CHECK-BE-NEXT: pmxvf64gernp acc0, vsp34, v0, 0, 0
53 ; CHECK-BE-NEXT: xxmfacc acc0
54 ; CHECK-BE-NEXT: stxv vs1, 16(r3)
55 ; CHECK-BE-NEXT: stxv vs0, 0(r3)
56 ; CHECK-BE-NEXT: stxv vs3, 48(r3)
57 ; CHECK-BE-NEXT: stxv vs2, 32(r3)
59 %1 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc1, <16 x i8> %vc3, <16 x i8> %vc2, <16 x i8> %vc4)
60 %2 = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> %1, <16 x i8> %vc1, <16 x i8> %vc2)
61 %3 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> %2, <16 x i8> %vc1, <16 x i8> %vc3)
62 %4 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> %3, <16 x i8> %vc2, <16 x i8> %vc4, i32 0, i32 0)
63 %5 = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %vc4, <16 x i8> %vc1)
64 %6 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> %4, <256 x i1> %5, <16 x i8> %vc1, i32 0, i32 0)
65 %7 = bitcast i8* %ptr to <512 x i1>*
66 store <512 x i1> %6, <512 x i1>* %7, align 64
70 declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1>)
71 define void @intrinsics2(<16 x i8>* %ptr1, <16 x i8>* %ptr2, <16 x i8>* %ptr3, <16 x i8>* %ptr4, i8* %ptr) {
72 ; CHECK-LABEL: intrinsics2:
74 ; CHECK-NEXT: lxv v2, 0(r3)
75 ; CHECK-NEXT: lxv v3, 0(r4)
76 ; CHECK-NEXT: lxv v4, 0(r5)
77 ; CHECK-NEXT: lxv v5, 0(r6)
78 ; CHECK-NEXT: xxlor vs0, v2, v2
79 ; CHECK-NEXT: xxlor vs1, v3, v3
80 ; CHECK-NEXT: xxlor vs2, v4, v4
81 ; CHECK-NEXT: xxlor vs3, v5, v5
82 ; CHECK-NEXT: vmr v1, v2
83 ; CHECK-NEXT: vmr v0, v5
84 ; CHECK-NEXT: xxmtacc acc0
85 ; CHECK-NEXT: xvi8ger4pp acc0, v2, v3
86 ; CHECK-NEXT: xvf16ger2pn acc0, v2, v4
87 ; CHECK-NEXT: pmxvf32gernn acc0, v3, v5, 0, 0
88 ; CHECK-NEXT: pmxvf64gernn acc0, vsp32, v2, 0, 0
89 ; CHECK-NEXT: xxmfacc acc0
90 ; CHECK-NEXT: stxv vs3, 0(r3)
91 ; CHECK-NEXT: stxv vs2, 0(r4)
92 ; CHECK-NEXT: stxv vs1, 0(r5)
93 ; CHECK-NEXT: stxv vs0, 0(r6)
96 ; CHECK-BE-LABEL: intrinsics2:
98 ; CHECK-BE-NEXT: lxv v2, 0(r3)
99 ; CHECK-BE-NEXT: lxv v3, 0(r4)
100 ; CHECK-BE-NEXT: lxv v4, 0(r5)
101 ; CHECK-BE-NEXT: lxv v5, 0(r6)
102 ; CHECK-BE-NEXT: xxlor vs0, v2, v2
103 ; CHECK-BE-NEXT: xxlor vs1, v3, v3
104 ; CHECK-BE-NEXT: xxlor vs2, v4, v4
105 ; CHECK-BE-NEXT: xxlor vs3, v5, v5
106 ; CHECK-BE-NEXT: vmr v1, v2
107 ; CHECK-BE-NEXT: vmr v0, v5
108 ; CHECK-BE-NEXT: xxmtacc acc0
109 ; CHECK-BE-NEXT: xvi8ger4pp acc0, v2, v3
110 ; CHECK-BE-NEXT: xvf16ger2pn acc0, v2, v4
111 ; CHECK-BE-NEXT: pmxvf32gernn acc0, v3, v5, 0, 0
112 ; CHECK-BE-NEXT: pmxvf64gernn acc0, vsp32, v2, 0, 0
113 ; CHECK-BE-NEXT: xxmfacc acc0
114 ; CHECK-BE-NEXT: stxv vs0, 0(r3)
115 ; CHECK-BE-NEXT: stxv vs1, 0(r4)
116 ; CHECK-BE-NEXT: stxv vs2, 0(r5)
117 ; CHECK-BE-NEXT: stxv vs3, 0(r6)
119 %vc1 = load <16 x i8>, <16 x i8>* %ptr1, align 16
120 %vc2 = load <16 x i8>, <16 x i8>* %ptr2, align 16
121 %vc3 = load <16 x i8>, <16 x i8>* %ptr3, align 16
122 %vc4 = load <16 x i8>, <16 x i8>* %ptr4, align 16
123 %1 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i8> %vc4)
124 %2 = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1> %1, <16 x i8> %vc1, <16 x i8> %vc2)
125 %3 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1> %2, <16 x i8> %vc1, <16 x i8> %vc3)
126 %4 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> %3, <16 x i8> %vc2, <16 x i8> %vc4, i32 0, i32 0)
127 %5 = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %vc4, <16 x i8> %vc1)
128 %6 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> %4, <256 x i1> %5, <16 x i8> %vc1, i32 0, i32 0)
129 %7 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %6)
130 %8 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %7, 0
131 %9 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %7, 1
132 %10 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %7, 2
133 %11 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %7, 3
134 %12 = bitcast i8* %ptr to <512 x i1>*
135 store <16 x i8> %8, <16 x i8>* %ptr1, align 16
136 store <16 x i8> %9, <16 x i8>* %ptr2, align 16
137 store <16 x i8> %10, <16 x i8>* %ptr3, align 16
138 store <16 x i8> %11, <16 x i8>* %ptr4, align 16
142 define void @test1(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
143 ; CHECK-LABEL: test1:
144 ; CHECK: # %bb.0: # %entry
145 ; CHECK-NEXT: xvi4ger8 acc0, v2, v2
146 ; CHECK-NEXT: xxmfacc acc0
147 ; CHECK-NEXT: stxv vs0, 48(r7)
148 ; CHECK-NEXT: stxv vs1, 32(r7)
149 ; CHECK-NEXT: stxv vs2, 16(r7)
150 ; CHECK-NEXT: stxv vs3, 0(r7)
153 ; CHECK-BE-LABEL: test1:
154 ; CHECK-BE: # %bb.0: # %entry
155 ; CHECK-BE-NEXT: xvi4ger8 acc0, v2, v2
156 ; CHECK-BE-NEXT: xxmfacc acc0
157 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
158 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
159 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
160 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
163 %0 = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> %vc, <16 x i8> %vc)
164 %1 = bitcast i8* %resp to <512 x i1>*
165 store <512 x i1> %0, <512 x i1>* %1, align 64
170 declare <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8>, <16 x i8>)
172 define void @test2(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
173 ; CHECK-LABEL: test2:
174 ; CHECK: # %bb.0: # %entry
175 ; CHECK-NEXT: lxv vs1, 32(r3)
176 ; CHECK-NEXT: lxv vs0, 48(r3)
177 ; CHECK-NEXT: lxv vs3, 0(r3)
178 ; CHECK-NEXT: lxv vs2, 16(r3)
179 ; CHECK-NEXT: xxmtacc acc0
180 ; CHECK-NEXT: xvi4ger8pp acc0, v2, v2
181 ; CHECK-NEXT: xxmfacc acc0
182 ; CHECK-NEXT: stxv vs0, 48(r7)
183 ; CHECK-NEXT: stxv vs1, 32(r7)
184 ; CHECK-NEXT: stxv vs2, 16(r7)
185 ; CHECK-NEXT: stxv vs3, 0(r7)
188 ; CHECK-BE-LABEL: test2:
189 ; CHECK-BE: # %bb.0: # %entry
190 ; CHECK-BE-NEXT: lxv vs1, 16(r3)
191 ; CHECK-BE-NEXT: lxv vs0, 0(r3)
192 ; CHECK-BE-NEXT: lxv vs3, 48(r3)
193 ; CHECK-BE-NEXT: lxv vs2, 32(r3)
194 ; CHECK-BE-NEXT: xxmtacc acc0
195 ; CHECK-BE-NEXT: xvi4ger8pp acc0, v2, v2
196 ; CHECK-BE-NEXT: xxmfacc acc0
197 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
198 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
199 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
200 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
203 %0 = bitcast i8* %vqp to <512 x i1>*
204 %1 = load <512 x i1>, <512 x i1>* %0, align 64
205 %2 = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc)
206 %3 = bitcast i8* %resp to <512 x i1>*
207 store <512 x i1> %2, <512 x i1>* %3, align 64
212 declare <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1>, <16 x i8>, <16 x i8>)
214 define void @test3(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
215 ; CHECK-LABEL: test3:
216 ; CHECK: # %bb.0: # %entry
217 ; CHECK-NEXT: pmxvi4ger8 acc0, v2, v2, 0, 0, 0
218 ; CHECK-NEXT: xxmfacc acc0
219 ; CHECK-NEXT: stxv vs0, 48(r7)
220 ; CHECK-NEXT: stxv vs1, 32(r7)
221 ; CHECK-NEXT: stxv vs2, 16(r7)
222 ; CHECK-NEXT: stxv vs3, 0(r7)
225 ; CHECK-BE-LABEL: test3:
226 ; CHECK-BE: # %bb.0: # %entry
227 ; CHECK-BE-NEXT: pmxvi4ger8 acc0, v2, v2, 0, 0, 0
228 ; CHECK-BE-NEXT: xxmfacc acc0
229 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
230 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
231 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
232 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
235 %0 = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
236 %1 = bitcast i8* %resp to <512 x i1>*
237 store <512 x i1> %0, <512 x i1>* %1, align 64
242 declare <512 x i1> @llvm.ppc.mma.pmxvi4ger8(<16 x i8>, <16 x i8>, i32, i32, i32)
244 define void @test4(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
245 ; CHECK-LABEL: test4:
246 ; CHECK: # %bb.0: # %entry
247 ; CHECK-NEXT: lxv vs1, 32(r3)
248 ; CHECK-NEXT: lxv vs0, 48(r3)
249 ; CHECK-NEXT: lxv vs3, 0(r3)
250 ; CHECK-NEXT: lxv vs2, 16(r3)
251 ; CHECK-NEXT: xxmtacc acc0
252 ; CHECK-NEXT: pmxvi4ger8pp acc0, v2, v2, 0, 0, 0
253 ; CHECK-NEXT: xxmfacc acc0
254 ; CHECK-NEXT: stxv vs0, 48(r7)
255 ; CHECK-NEXT: stxv vs1, 32(r7)
256 ; CHECK-NEXT: stxv vs2, 16(r7)
257 ; CHECK-NEXT: stxv vs3, 0(r7)
260 ; CHECK-BE-LABEL: test4:
261 ; CHECK-BE: # %bb.0: # %entry
262 ; CHECK-BE-NEXT: lxv vs1, 16(r3)
263 ; CHECK-BE-NEXT: lxv vs0, 0(r3)
264 ; CHECK-BE-NEXT: lxv vs3, 48(r3)
265 ; CHECK-BE-NEXT: lxv vs2, 32(r3)
266 ; CHECK-BE-NEXT: xxmtacc acc0
267 ; CHECK-BE-NEXT: pmxvi4ger8pp acc0, v2, v2, 0, 0, 0
268 ; CHECK-BE-NEXT: xxmfacc acc0
269 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
270 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
271 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
272 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
275 %0 = bitcast i8* %vqp to <512 x i1>*
276 %1 = load <512 x i1>, <512 x i1>* %0, align 64
277 %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8pp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
278 %3 = bitcast i8* %resp to <512 x i1>*
279 store <512 x i1> %2, <512 x i1>* %3, align 64
284 declare <512 x i1> @llvm.ppc.mma.pmxvi4ger8pp(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32, i32)
286 define void @test5(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
287 ; CHECK-LABEL: test5:
288 ; CHECK: # %bb.0: # %entry
289 ; CHECK-NEXT: xvi8ger4 acc0, v2, v2
290 ; CHECK-NEXT: xxmfacc acc0
291 ; CHECK-NEXT: stxv vs0, 48(r7)
292 ; CHECK-NEXT: stxv vs1, 32(r7)
293 ; CHECK-NEXT: stxv vs2, 16(r7)
294 ; CHECK-NEXT: stxv vs3, 0(r7)
297 ; CHECK-BE-LABEL: test5:
298 ; CHECK-BE: # %bb.0: # %entry
299 ; CHECK-BE-NEXT: xvi8ger4 acc0, v2, v2
300 ; CHECK-BE-NEXT: xxmfacc acc0
301 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
302 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
303 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
304 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
307 %0 = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4(<16 x i8> %vc, <16 x i8> %vc)
308 %1 = bitcast i8* %resp to <512 x i1>*
309 store <512 x i1> %0, <512 x i1>* %1, align 64
314 declare <512 x i1> @llvm.ppc.mma.xvi8ger4(<16 x i8>, <16 x i8>)
316 define void @test6(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
317 ; CHECK-LABEL: test6:
318 ; CHECK: # %bb.0: # %entry
319 ; CHECK-NEXT: lxv vs1, 32(r3)
320 ; CHECK-NEXT: lxv vs0, 48(r3)
321 ; CHECK-NEXT: lxv vs3, 0(r3)
322 ; CHECK-NEXT: lxv vs2, 16(r3)
323 ; CHECK-NEXT: xxmtacc acc0
324 ; CHECK-NEXT: xvi8ger4pp acc0, v2, v2
325 ; CHECK-NEXT: xxmfacc acc0
326 ; CHECK-NEXT: stxv vs0, 48(r7)
327 ; CHECK-NEXT: stxv vs1, 32(r7)
328 ; CHECK-NEXT: stxv vs2, 16(r7)
329 ; CHECK-NEXT: stxv vs3, 0(r7)
332 ; CHECK-BE-LABEL: test6:
333 ; CHECK-BE: # %bb.0: # %entry
334 ; CHECK-BE-NEXT: lxv vs1, 16(r3)
335 ; CHECK-BE-NEXT: lxv vs0, 0(r3)
336 ; CHECK-BE-NEXT: lxv vs3, 48(r3)
337 ; CHECK-BE-NEXT: lxv vs2, 32(r3)
338 ; CHECK-BE-NEXT: xxmtacc acc0
339 ; CHECK-BE-NEXT: xvi8ger4pp acc0, v2, v2
340 ; CHECK-BE-NEXT: xxmfacc acc0
341 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
342 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
343 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
344 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
347 %0 = bitcast i8* %vqp to <512 x i1>*
348 %1 = load <512 x i1>, <512 x i1>* %0, align 64
349 %2 = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc)
350 %3 = bitcast i8* %resp to <512 x i1>*
351 store <512 x i1> %2, <512 x i1>* %3, align 64
356 declare <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1>, <16 x i8>, <16 x i8>)
358 define void @test7(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
359 ; CHECK-LABEL: test7:
360 ; CHECK: # %bb.0: # %entry
361 ; CHECK-NEXT: pmxvi8ger4 acc0, v2, v2, 0, 0, 0
362 ; CHECK-NEXT: xxmfacc acc0
363 ; CHECK-NEXT: stxv vs0, 48(r7)
364 ; CHECK-NEXT: stxv vs1, 32(r7)
365 ; CHECK-NEXT: stxv vs2, 16(r7)
366 ; CHECK-NEXT: stxv vs3, 0(r7)
369 ; CHECK-BE-LABEL: test7:
370 ; CHECK-BE: # %bb.0: # %entry
371 ; CHECK-BE-NEXT: pmxvi8ger4 acc0, v2, v2, 0, 0, 0
372 ; CHECK-BE-NEXT: xxmfacc acc0
373 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
374 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
375 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
376 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
379 %0 = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
380 %1 = bitcast i8* %resp to <512 x i1>*
381 store <512 x i1> %0, <512 x i1>* %1, align 64
386 declare <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8>, <16 x i8>, i32, i32, i32)
388 define void @test8(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
389 ; CHECK-LABEL: test8:
390 ; CHECK: # %bb.0: # %entry
391 ; CHECK-NEXT: lxv vs1, 32(r3)
392 ; CHECK-NEXT: lxv vs0, 48(r3)
393 ; CHECK-NEXT: lxv vs3, 0(r3)
394 ; CHECK-NEXT: lxv vs2, 16(r3)
395 ; CHECK-NEXT: xxmtacc acc0
396 ; CHECK-NEXT: pmxvi8ger4pp acc0, v2, v2, 0, 0, 0
397 ; CHECK-NEXT: xxmfacc acc0
398 ; CHECK-NEXT: stxv vs0, 48(r7)
399 ; CHECK-NEXT: stxv vs1, 32(r7)
400 ; CHECK-NEXT: stxv vs2, 16(r7)
401 ; CHECK-NEXT: stxv vs3, 0(r7)
404 ; CHECK-BE-LABEL: test8:
405 ; CHECK-BE: # %bb.0: # %entry
406 ; CHECK-BE-NEXT: lxv vs1, 16(r3)
407 ; CHECK-BE-NEXT: lxv vs0, 0(r3)
408 ; CHECK-BE-NEXT: lxv vs3, 48(r3)
409 ; CHECK-BE-NEXT: lxv vs2, 32(r3)
410 ; CHECK-BE-NEXT: xxmtacc acc0
411 ; CHECK-BE-NEXT: pmxvi8ger4pp acc0, v2, v2, 0, 0, 0
412 ; CHECK-BE-NEXT: xxmfacc acc0
413 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
414 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
415 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
416 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
419 %0 = bitcast i8* %vqp to <512 x i1>*
420 %1 = load <512 x i1>, <512 x i1>* %0, align 64
421 %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
422 %3 = bitcast i8* %resp to <512 x i1>*
423 store <512 x i1> %2, <512 x i1>* %3, align 64
428 declare <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32, i32)
430 define void @test9(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
431 ; CHECK-LABEL: test9:
432 ; CHECK: # %bb.0: # %entry
433 ; CHECK-NEXT: xvi16ger2s acc0, v2, v2
434 ; CHECK-NEXT: xxmfacc acc0
435 ; CHECK-NEXT: stxv vs0, 48(r7)
436 ; CHECK-NEXT: stxv vs1, 32(r7)
437 ; CHECK-NEXT: stxv vs2, 16(r7)
438 ; CHECK-NEXT: stxv vs3, 0(r7)
441 ; CHECK-BE-LABEL: test9:
442 ; CHECK-BE: # %bb.0: # %entry
443 ; CHECK-BE-NEXT: xvi16ger2s acc0, v2, v2
444 ; CHECK-BE-NEXT: xxmfacc acc0
445 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
446 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
447 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
448 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
451 %0 = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2s(<16 x i8> %vc, <16 x i8> %vc)
452 %1 = bitcast i8* %resp to <512 x i1>*
453 store <512 x i1> %0, <512 x i1>* %1, align 64
458 declare <512 x i1> @llvm.ppc.mma.xvi16ger2s(<16 x i8>, <16 x i8>)
460 define void @test10(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
461 ; CHECK-LABEL: test10:
462 ; CHECK: # %bb.0: # %entry
463 ; CHECK-NEXT: lxv vs1, 32(r3)
464 ; CHECK-NEXT: lxv vs0, 48(r3)
465 ; CHECK-NEXT: lxv vs3, 0(r3)
466 ; CHECK-NEXT: lxv vs2, 16(r3)
467 ; CHECK-NEXT: xxmtacc acc0
468 ; CHECK-NEXT: xvi16ger2spp acc0, v2, v2
469 ; CHECK-NEXT: xxmfacc acc0
470 ; CHECK-NEXT: stxv vs0, 48(r7)
471 ; CHECK-NEXT: stxv vs1, 32(r7)
472 ; CHECK-NEXT: stxv vs2, 16(r7)
473 ; CHECK-NEXT: stxv vs3, 0(r7)
476 ; CHECK-BE-LABEL: test10:
477 ; CHECK-BE: # %bb.0: # %entry
478 ; CHECK-BE-NEXT: lxv vs1, 16(r3)
479 ; CHECK-BE-NEXT: lxv vs0, 0(r3)
480 ; CHECK-BE-NEXT: lxv vs3, 48(r3)
481 ; CHECK-BE-NEXT: lxv vs2, 32(r3)
482 ; CHECK-BE-NEXT: xxmtacc acc0
483 ; CHECK-BE-NEXT: xvi16ger2spp acc0, v2, v2
484 ; CHECK-BE-NEXT: xxmfacc acc0
485 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
486 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
487 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
488 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
491 %0 = bitcast i8* %vqp to <512 x i1>*
492 %1 = load <512 x i1>, <512 x i1>* %0, align 64
493 %2 = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2spp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc)
494 %3 = bitcast i8* %resp to <512 x i1>*
495 store <512 x i1> %2, <512 x i1>* %3, align 64
500 declare <512 x i1> @llvm.ppc.mma.xvi16ger2spp(<512 x i1>, <16 x i8>, <16 x i8>)
502 define void @test11(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
503 ; CHECK-LABEL: test11:
504 ; CHECK: # %bb.0: # %entry
505 ; CHECK-NEXT: pmxvi16ger2s acc0, v2, v2, 0, 0, 0
506 ; CHECK-NEXT: xxmfacc acc0
507 ; CHECK-NEXT: stxv vs0, 48(r7)
508 ; CHECK-NEXT: stxv vs1, 32(r7)
509 ; CHECK-NEXT: stxv vs2, 16(r7)
510 ; CHECK-NEXT: stxv vs3, 0(r7)
513 ; CHECK-BE-LABEL: test11:
514 ; CHECK-BE: # %bb.0: # %entry
515 ; CHECK-BE-NEXT: pmxvi16ger2s acc0, v2, v2, 0, 0, 0
516 ; CHECK-BE-NEXT: xxmfacc acc0
517 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
518 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
519 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
520 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
523 %0 = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
524 %1 = bitcast i8* %resp to <512 x i1>*
525 store <512 x i1> %0, <512 x i1>* %1, align 64
530 declare <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8>, <16 x i8>, i32, i32, i32)
532 define void @test12(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
533 ; CHECK-LABEL: test12:
534 ; CHECK: # %bb.0: # %entry
535 ; CHECK-NEXT: lxv vs1, 32(r3)
536 ; CHECK-NEXT: lxv vs0, 48(r3)
537 ; CHECK-NEXT: lxv vs3, 0(r3)
538 ; CHECK-NEXT: lxv vs2, 16(r3)
539 ; CHECK-NEXT: xxmtacc acc0
540 ; CHECK-NEXT: pmxvi16ger2spp acc0, v2, v2, 0, 0, 0
541 ; CHECK-NEXT: xxmfacc acc0
542 ; CHECK-NEXT: stxv vs0, 48(r7)
543 ; CHECK-NEXT: stxv vs1, 32(r7)
544 ; CHECK-NEXT: stxv vs2, 16(r7)
545 ; CHECK-NEXT: stxv vs3, 0(r7)
548 ; CHECK-BE-LABEL: test12:
549 ; CHECK-BE: # %bb.0: # %entry
550 ; CHECK-BE-NEXT: lxv vs1, 16(r3)
551 ; CHECK-BE-NEXT: lxv vs0, 0(r3)
552 ; CHECK-BE-NEXT: lxv vs3, 48(r3)
553 ; CHECK-BE-NEXT: lxv vs2, 32(r3)
554 ; CHECK-BE-NEXT: xxmtacc acc0
555 ; CHECK-BE-NEXT: pmxvi16ger2spp acc0, v2, v2, 0, 0, 0
556 ; CHECK-BE-NEXT: xxmfacc acc0
557 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
558 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
559 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
560 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
563 %0 = bitcast i8* %vqp to <512 x i1>*
564 %1 = load <512 x i1>, <512 x i1>* %0, align 64
565 %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
566 %3 = bitcast i8* %resp to <512 x i1>*
567 store <512 x i1> %2, <512 x i1>* %3, align 64
572 declare <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32, i32)
574 define void @test13(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
575 ; CHECK-LABEL: test13:
576 ; CHECK: # %bb.0: # %entry
577 ; CHECK-NEXT: xvf16ger2 acc0, v2, v2
578 ; CHECK-NEXT: xxmfacc acc0
579 ; CHECK-NEXT: stxv vs0, 48(r7)
580 ; CHECK-NEXT: stxv vs1, 32(r7)
581 ; CHECK-NEXT: stxv vs2, 16(r7)
582 ; CHECK-NEXT: stxv vs3, 0(r7)
585 ; CHECK-BE-LABEL: test13:
586 ; CHECK-BE: # %bb.0: # %entry
587 ; CHECK-BE-NEXT: xvf16ger2 acc0, v2, v2
588 ; CHECK-BE-NEXT: xxmfacc acc0
589 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
590 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
591 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
592 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
595 %0 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2(<16 x i8> %vc, <16 x i8> %vc)
596 %1 = bitcast i8* %resp to <512 x i1>*
597 store <512 x i1> %0, <512 x i1>* %1, align 64
602 declare <512 x i1> @llvm.ppc.mma.xvf16ger2(<16 x i8>, <16 x i8>)
604 define void @test14(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
605 ; CHECK-LABEL: test14:
606 ; CHECK: # %bb.0: # %entry
607 ; CHECK-NEXT: lxv vs1, 32(r3)
608 ; CHECK-NEXT: lxv vs0, 48(r3)
609 ; CHECK-NEXT: lxv vs3, 0(r3)
610 ; CHECK-NEXT: lxv vs2, 16(r3)
611 ; CHECK-NEXT: xxmtacc acc0
612 ; CHECK-NEXT: xvf16ger2pp acc0, v2, v2
613 ; CHECK-NEXT: xxmfacc acc0
614 ; CHECK-NEXT: stxv vs0, 48(r7)
615 ; CHECK-NEXT: stxv vs1, 32(r7)
616 ; CHECK-NEXT: stxv vs2, 16(r7)
617 ; CHECK-NEXT: stxv vs3, 0(r7)
620 ; CHECK-BE-LABEL: test14:
621 ; CHECK-BE: # %bb.0: # %entry
622 ; CHECK-BE-NEXT: lxv vs1, 16(r3)
623 ; CHECK-BE-NEXT: lxv vs0, 0(r3)
624 ; CHECK-BE-NEXT: lxv vs3, 48(r3)
625 ; CHECK-BE-NEXT: lxv vs2, 32(r3)
626 ; CHECK-BE-NEXT: xxmtacc acc0
627 ; CHECK-BE-NEXT: xvf16ger2pp acc0, v2, v2
628 ; CHECK-BE-NEXT: xxmfacc acc0
629 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
630 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
631 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
632 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
635 %0 = bitcast i8* %vqp to <512 x i1>*
636 %1 = load <512 x i1>, <512 x i1>* %0, align 64
637 %2 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc)
638 %3 = bitcast i8* %resp to <512 x i1>*
639 store <512 x i1> %2, <512 x i1>* %3, align 64
644 declare <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1>, <16 x i8>, <16 x i8>)
646 define void @test15(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
647 ; CHECK-LABEL: test15:
648 ; CHECK: # %bb.0: # %entry
649 ; CHECK-NEXT: lxv vs1, 32(r3)
650 ; CHECK-NEXT: lxv vs0, 48(r3)
651 ; CHECK-NEXT: lxv vs3, 0(r3)
652 ; CHECK-NEXT: lxv vs2, 16(r3)
653 ; CHECK-NEXT: xxmtacc acc0
654 ; CHECK-NEXT: xvf16ger2pn acc0, v2, v2
655 ; CHECK-NEXT: xxmfacc acc0
656 ; CHECK-NEXT: stxv vs0, 48(r7)
657 ; CHECK-NEXT: stxv vs1, 32(r7)
658 ; CHECK-NEXT: stxv vs2, 16(r7)
659 ; CHECK-NEXT: stxv vs3, 0(r7)
662 ; CHECK-BE-LABEL: test15:
663 ; CHECK-BE: # %bb.0: # %entry
664 ; CHECK-BE-NEXT: lxv vs1, 16(r3)
665 ; CHECK-BE-NEXT: lxv vs0, 0(r3)
666 ; CHECK-BE-NEXT: lxv vs3, 48(r3)
667 ; CHECK-BE-NEXT: lxv vs2, 32(r3)
668 ; CHECK-BE-NEXT: xxmtacc acc0
669 ; CHECK-BE-NEXT: xvf16ger2pn acc0, v2, v2
670 ; CHECK-BE-NEXT: xxmfacc acc0
671 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
672 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
673 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
674 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
677 %0 = bitcast i8* %vqp to <512 x i1>*
678 %1 = load <512 x i1>, <512 x i1>* %0, align 64
679 %2 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc)
680 %3 = bitcast i8* %resp to <512 x i1>*
681 store <512 x i1> %2, <512 x i1>* %3, align 64
686 declare <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1>, <16 x i8>, <16 x i8>)
688 define void @test16(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
689 ; CHECK-LABEL: test16:
690 ; CHECK: # %bb.0: # %entry
691 ; CHECK-NEXT: lxv vs1, 32(r3)
692 ; CHECK-NEXT: lxv vs0, 48(r3)
693 ; CHECK-NEXT: lxv vs3, 0(r3)
694 ; CHECK-NEXT: lxv vs2, 16(r3)
695 ; CHECK-NEXT: xxmtacc acc0
696 ; CHECK-NEXT: xvf16ger2np acc0, v2, v2
697 ; CHECK-NEXT: xxmfacc acc0
698 ; CHECK-NEXT: stxv vs0, 48(r7)
699 ; CHECK-NEXT: stxv vs1, 32(r7)
700 ; CHECK-NEXT: stxv vs2, 16(r7)
701 ; CHECK-NEXT: stxv vs3, 0(r7)
704 ; CHECK-BE-LABEL: test16:
705 ; CHECK-BE: # %bb.0: # %entry
706 ; CHECK-BE-NEXT: lxv vs1, 16(r3)
707 ; CHECK-BE-NEXT: lxv vs0, 0(r3)
708 ; CHECK-BE-NEXT: lxv vs3, 48(r3)
709 ; CHECK-BE-NEXT: lxv vs2, 32(r3)
710 ; CHECK-BE-NEXT: xxmtacc acc0
711 ; CHECK-BE-NEXT: xvf16ger2np acc0, v2, v2
712 ; CHECK-BE-NEXT: xxmfacc acc0
713 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
714 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
715 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
716 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
719 %0 = bitcast i8* %vqp to <512 x i1>*
720 %1 = load <512 x i1>, <512 x i1>* %0, align 64
721 %2 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2np(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc)
722 %3 = bitcast i8* %resp to <512 x i1>*
723 store <512 x i1> %2, <512 x i1>* %3, align 64
728 declare <512 x i1> @llvm.ppc.mma.xvf16ger2np(<512 x i1>, <16 x i8>, <16 x i8>)
730 define void @test17(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
731 ; CHECK-LABEL: test17:
732 ; CHECK: # %bb.0: # %entry
733 ; CHECK-NEXT: lxv vs1, 32(r3)
734 ; CHECK-NEXT: lxv vs0, 48(r3)
735 ; CHECK-NEXT: lxv vs3, 0(r3)
736 ; CHECK-NEXT: lxv vs2, 16(r3)
737 ; CHECK-NEXT: xxmtacc acc0
738 ; CHECK-NEXT: xvf16ger2nn acc0, v2, v2
739 ; CHECK-NEXT: xxmfacc acc0
740 ; CHECK-NEXT: stxv vs0, 48(r7)
741 ; CHECK-NEXT: stxv vs1, 32(r7)
742 ; CHECK-NEXT: stxv vs2, 16(r7)
743 ; CHECK-NEXT: stxv vs3, 0(r7)
746 ; CHECK-BE-LABEL: test17:
747 ; CHECK-BE: # %bb.0: # %entry
748 ; CHECK-BE-NEXT: lxv vs1, 16(r3)
749 ; CHECK-BE-NEXT: lxv vs0, 0(r3)
750 ; CHECK-BE-NEXT: lxv vs3, 48(r3)
751 ; CHECK-BE-NEXT: lxv vs2, 32(r3)
752 ; CHECK-BE-NEXT: xxmtacc acc0
753 ; CHECK-BE-NEXT: xvf16ger2nn acc0, v2, v2
754 ; CHECK-BE-NEXT: xxmfacc acc0
755 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
756 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
757 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
758 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
761 %0 = bitcast i8* %vqp to <512 x i1>*
762 %1 = load <512 x i1>, <512 x i1>* %0, align 64
763 %2 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2nn(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc)
764 %3 = bitcast i8* %resp to <512 x i1>*
765 store <512 x i1> %2, <512 x i1>* %3, align 64
770 declare <512 x i1> @llvm.ppc.mma.xvf16ger2nn(<512 x i1>, <16 x i8>, <16 x i8>)
772 define void @test18(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
773 ; CHECK-LABEL: test18:
774 ; CHECK: # %bb.0: # %entry
775 ; CHECK-NEXT: pmxvf16ger2 acc0, v2, v2, 0, 0, 0
776 ; CHECK-NEXT: xxmfacc acc0
777 ; CHECK-NEXT: stxv vs0, 48(r7)
778 ; CHECK-NEXT: stxv vs1, 32(r7)
779 ; CHECK-NEXT: stxv vs2, 16(r7)
780 ; CHECK-NEXT: stxv vs3, 0(r7)
783 ; CHECK-BE-LABEL: test18:
784 ; CHECK-BE: # %bb.0: # %entry
785 ; CHECK-BE-NEXT: pmxvf16ger2 acc0, v2, v2, 0, 0, 0
786 ; CHECK-BE-NEXT: xxmfacc acc0
787 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
788 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
789 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
790 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
793 %0 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
794 %1 = bitcast i8* %resp to <512 x i1>*
795 store <512 x i1> %0, <512 x i1>* %1, align 64
800 declare <512 x i1> @llvm.ppc.mma.pmxvf16ger2(<16 x i8>, <16 x i8>, i32, i32, i32)
802 define void @test19(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
803 ; CHECK-LABEL: test19:
804 ; CHECK: # %bb.0: # %entry
805 ; CHECK-NEXT: lxv vs1, 32(r3)
806 ; CHECK-NEXT: lxv vs0, 48(r3)
807 ; CHECK-NEXT: lxv vs3, 0(r3)
808 ; CHECK-NEXT: lxv vs2, 16(r3)
809 ; CHECK-NEXT: xxmtacc acc0
810 ; CHECK-NEXT: pmxvf16ger2pp acc0, v2, v2, 0, 0, 0
811 ; CHECK-NEXT: xxmfacc acc0
812 ; CHECK-NEXT: stxv vs0, 48(r7)
813 ; CHECK-NEXT: stxv vs1, 32(r7)
814 ; CHECK-NEXT: stxv vs2, 16(r7)
815 ; CHECK-NEXT: stxv vs3, 0(r7)
818 ; CHECK-BE-LABEL: test19:
819 ; CHECK-BE: # %bb.0: # %entry
820 ; CHECK-BE-NEXT: lxv vs1, 16(r3)
821 ; CHECK-BE-NEXT: lxv vs0, 0(r3)
822 ; CHECK-BE-NEXT: lxv vs3, 48(r3)
823 ; CHECK-BE-NEXT: lxv vs2, 32(r3)
824 ; CHECK-BE-NEXT: xxmtacc acc0
825 ; CHECK-BE-NEXT: pmxvf16ger2pp acc0, v2, v2, 0, 0, 0
826 ; CHECK-BE-NEXT: xxmfacc acc0
827 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
828 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
829 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
830 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
833 %0 = bitcast i8* %vqp to <512 x i1>*
834 %1 = load <512 x i1>, <512 x i1>* %0, align 64
835 %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
836 %3 = bitcast i8* %resp to <512 x i1>*
837 store <512 x i1> %2, <512 x i1>* %3, align 64
842 declare <512 x i1> @llvm.ppc.mma.pmxvf16ger2pp(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32, i32)
844 define void @test20(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
845 ; CHECK-LABEL: test20:
846 ; CHECK: # %bb.0: # %entry
847 ; CHECK-NEXT: lxv vs1, 32(r3)
848 ; CHECK-NEXT: lxv vs0, 48(r3)
849 ; CHECK-NEXT: lxv vs3, 0(r3)
850 ; CHECK-NEXT: lxv vs2, 16(r3)
851 ; CHECK-NEXT: xxmtacc acc0
852 ; CHECK-NEXT: pmxvf16ger2pn acc0, v2, v2, 0, 0, 0
853 ; CHECK-NEXT: xxmfacc acc0
854 ; CHECK-NEXT: stxv vs0, 48(r7)
855 ; CHECK-NEXT: stxv vs1, 32(r7)
856 ; CHECK-NEXT: stxv vs2, 16(r7)
857 ; CHECK-NEXT: stxv vs3, 0(r7)
860 ; CHECK-BE-LABEL: test20:
861 ; CHECK-BE: # %bb.0: # %entry
862 ; CHECK-BE-NEXT: lxv vs1, 16(r3)
863 ; CHECK-BE-NEXT: lxv vs0, 0(r3)
864 ; CHECK-BE-NEXT: lxv vs3, 48(r3)
865 ; CHECK-BE-NEXT: lxv vs2, 32(r3)
866 ; CHECK-BE-NEXT: xxmtacc acc0
867 ; CHECK-BE-NEXT: pmxvf16ger2pn acc0, v2, v2, 0, 0, 0
868 ; CHECK-BE-NEXT: xxmfacc acc0
869 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
870 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
871 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
872 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
875 %0 = bitcast i8* %vqp to <512 x i1>*
876 %1 = load <512 x i1>, <512 x i1>* %0, align 64
877 %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pn(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
878 %3 = bitcast i8* %resp to <512 x i1>*
879 store <512 x i1> %2, <512 x i1>* %3, align 64
884 declare <512 x i1> @llvm.ppc.mma.pmxvf16ger2pn(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32, i32)
886 define void @test21(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
887 ; CHECK-LABEL: test21:
888 ; CHECK: # %bb.0: # %entry
889 ; CHECK-NEXT: lxv vs1, 32(r3)
890 ; CHECK-NEXT: lxv vs0, 48(r3)
891 ; CHECK-NEXT: lxv vs3, 0(r3)
892 ; CHECK-NEXT: lxv vs2, 16(r3)
893 ; CHECK-NEXT: xxmtacc acc0
894 ; CHECK-NEXT: pmxvf16ger2np acc0, v2, v2, 0, 0, 0
895 ; CHECK-NEXT: xxmfacc acc0
896 ; CHECK-NEXT: stxv vs0, 48(r7)
897 ; CHECK-NEXT: stxv vs1, 32(r7)
898 ; CHECK-NEXT: stxv vs2, 16(r7)
899 ; CHECK-NEXT: stxv vs3, 0(r7)
902 ; CHECK-BE-LABEL: test21:
903 ; CHECK-BE: # %bb.0: # %entry
904 ; CHECK-BE-NEXT: lxv vs1, 16(r3)
905 ; CHECK-BE-NEXT: lxv vs0, 0(r3)
906 ; CHECK-BE-NEXT: lxv vs3, 48(r3)
907 ; CHECK-BE-NEXT: lxv vs2, 32(r3)
908 ; CHECK-BE-NEXT: xxmtacc acc0
909 ; CHECK-BE-NEXT: pmxvf16ger2np acc0, v2, v2, 0, 0, 0
910 ; CHECK-BE-NEXT: xxmfacc acc0
911 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
912 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
913 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
914 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
917 %0 = bitcast i8* %vqp to <512 x i1>*
918 %1 = load <512 x i1>, <512 x i1>* %0, align 64
919 %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2np(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
920 %3 = bitcast i8* %resp to <512 x i1>*
921 store <512 x i1> %2, <512 x i1>* %3, align 64
926 declare <512 x i1> @llvm.ppc.mma.pmxvf16ger2np(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32, i32)
928 define void @test22(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
929 ; CHECK-LABEL: test22:
930 ; CHECK: # %bb.0: # %entry
931 ; CHECK-NEXT: lxv vs1, 32(r3)
932 ; CHECK-NEXT: lxv vs0, 48(r3)
933 ; CHECK-NEXT: lxv vs3, 0(r3)
934 ; CHECK-NEXT: lxv vs2, 16(r3)
935 ; CHECK-NEXT: xxmtacc acc0
936 ; CHECK-NEXT: pmxvf16ger2nn acc0, v2, v2, 0, 0, 0
937 ; CHECK-NEXT: xxmfacc acc0
938 ; CHECK-NEXT: stxv vs0, 48(r7)
939 ; CHECK-NEXT: stxv vs1, 32(r7)
940 ; CHECK-NEXT: stxv vs2, 16(r7)
941 ; CHECK-NEXT: stxv vs3, 0(r7)
944 ; CHECK-BE-LABEL: test22:
945 ; CHECK-BE: # %bb.0: # %entry
946 ; CHECK-BE-NEXT: lxv vs1, 16(r3)
947 ; CHECK-BE-NEXT: lxv vs0, 0(r3)
948 ; CHECK-BE-NEXT: lxv vs3, 48(r3)
949 ; CHECK-BE-NEXT: lxv vs2, 32(r3)
950 ; CHECK-BE-NEXT: xxmtacc acc0
951 ; CHECK-BE-NEXT: pmxvf16ger2nn acc0, v2, v2, 0, 0, 0
952 ; CHECK-BE-NEXT: xxmfacc acc0
953 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
954 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
955 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
956 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
959 %0 = bitcast i8* %vqp to <512 x i1>*
960 %1 = load <512 x i1>, <512 x i1>* %0, align 64
961 %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2nn(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0)
962 %3 = bitcast i8* %resp to <512 x i1>*
963 store <512 x i1> %2, <512 x i1>* %3, align 64
968 declare <512 x i1> @llvm.ppc.mma.pmxvf16ger2nn(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32, i32)
970 define void @test23(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
971 ; CHECK-LABEL: test23:
972 ; CHECK: # %bb.0: # %entry
973 ; CHECK-NEXT: xvf32ger acc0, v2, v2
974 ; CHECK-NEXT: xxmfacc acc0
975 ; CHECK-NEXT: stxv vs0, 48(r7)
976 ; CHECK-NEXT: stxv vs1, 32(r7)
977 ; CHECK-NEXT: stxv vs2, 16(r7)
978 ; CHECK-NEXT: stxv vs3, 0(r7)
981 ; CHECK-BE-LABEL: test23:
982 ; CHECK-BE: # %bb.0: # %entry
983 ; CHECK-BE-NEXT: xvf32ger acc0, v2, v2
984 ; CHECK-BE-NEXT: xxmfacc acc0
985 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
986 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
987 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
988 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
991 %0 = tail call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> %vc, <16 x i8> %vc)
992 %1 = bitcast i8* %resp to <512 x i1>*
993 store <512 x i1> %0, <512 x i1>* %1, align 64
998 declare <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8>, <16 x i8>)
1000 define void @test24(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
1001 ; CHECK-LABEL: test24:
1002 ; CHECK: # %bb.0: # %entry
1003 ; CHECK-NEXT: lxv vs1, 32(r3)
1004 ; CHECK-NEXT: lxv vs0, 48(r3)
1005 ; CHECK-NEXT: lxv vs3, 0(r3)
1006 ; CHECK-NEXT: lxv vs2, 16(r3)
1007 ; CHECK-NEXT: xxmtacc acc0
1008 ; CHECK-NEXT: xvf32gerpp acc0, v2, v2
1009 ; CHECK-NEXT: xxmfacc acc0
1010 ; CHECK-NEXT: stxv vs0, 48(r7)
1011 ; CHECK-NEXT: stxv vs1, 32(r7)
1012 ; CHECK-NEXT: stxv vs2, 16(r7)
1013 ; CHECK-NEXT: stxv vs3, 0(r7)
1016 ; CHECK-BE-LABEL: test24:
1017 ; CHECK-BE: # %bb.0: # %entry
1018 ; CHECK-BE-NEXT: lxv vs1, 16(r3)
1019 ; CHECK-BE-NEXT: lxv vs0, 0(r3)
1020 ; CHECK-BE-NEXT: lxv vs3, 48(r3)
1021 ; CHECK-BE-NEXT: lxv vs2, 32(r3)
1022 ; CHECK-BE-NEXT: xxmtacc acc0
1023 ; CHECK-BE-NEXT: xvf32gerpp acc0, v2, v2
1024 ; CHECK-BE-NEXT: xxmfacc acc0
1025 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
1026 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
1027 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
1028 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
1029 ; CHECK-BE-NEXT: blr
1031 %0 = bitcast i8* %vqp to <512 x i1>*
1032 %1 = load <512 x i1>, <512 x i1>* %0, align 64
1033 %2 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc)
1034 %3 = bitcast i8* %resp to <512 x i1>*
1035 store <512 x i1> %2, <512 x i1>* %3, align 64
1040 declare <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1>, <16 x i8>, <16 x i8>)
1042 define void @test25(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
1043 ; CHECK-LABEL: test25:
1044 ; CHECK: # %bb.0: # %entry
1045 ; CHECK-NEXT: lxv vs1, 32(r3)
1046 ; CHECK-NEXT: lxv vs0, 48(r3)
1047 ; CHECK-NEXT: lxv vs3, 0(r3)
1048 ; CHECK-NEXT: lxv vs2, 16(r3)
1049 ; CHECK-NEXT: xxmtacc acc0
1050 ; CHECK-NEXT: xvf32gerpn acc0, v2, v2
1051 ; CHECK-NEXT: xxmfacc acc0
1052 ; CHECK-NEXT: stxv vs0, 48(r7)
1053 ; CHECK-NEXT: stxv vs1, 32(r7)
1054 ; CHECK-NEXT: stxv vs2, 16(r7)
1055 ; CHECK-NEXT: stxv vs3, 0(r7)
1058 ; CHECK-BE-LABEL: test25:
1059 ; CHECK-BE: # %bb.0: # %entry
1060 ; CHECK-BE-NEXT: lxv vs1, 16(r3)
1061 ; CHECK-BE-NEXT: lxv vs0, 0(r3)
1062 ; CHECK-BE-NEXT: lxv vs3, 48(r3)
1063 ; CHECK-BE-NEXT: lxv vs2, 32(r3)
1064 ; CHECK-BE-NEXT: xxmtacc acc0
1065 ; CHECK-BE-NEXT: xvf32gerpn acc0, v2, v2
1066 ; CHECK-BE-NEXT: xxmfacc acc0
1067 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
1068 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
1069 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
1070 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
1071 ; CHECK-BE-NEXT: blr
1073 %0 = bitcast i8* %vqp to <512 x i1>*
1074 %1 = load <512 x i1>, <512 x i1>* %0, align 64
1075 %2 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc)
1076 %3 = bitcast i8* %resp to <512 x i1>*
1077 store <512 x i1> %2, <512 x i1>* %3, align 64
1082 declare <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1>, <16 x i8>, <16 x i8>)
1084 define void @test26(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
1085 ; CHECK-LABEL: test26:
1086 ; CHECK: # %bb.0: # %entry
1087 ; CHECK-NEXT: lxv vs1, 32(r3)
1088 ; CHECK-NEXT: lxv vs0, 48(r3)
1089 ; CHECK-NEXT: lxv vs3, 0(r3)
1090 ; CHECK-NEXT: lxv vs2, 16(r3)
1091 ; CHECK-NEXT: xxmtacc acc0
1092 ; CHECK-NEXT: xvf32gernp acc0, v2, v2
1093 ; CHECK-NEXT: xxmfacc acc0
1094 ; CHECK-NEXT: stxv vs0, 48(r7)
1095 ; CHECK-NEXT: stxv vs1, 32(r7)
1096 ; CHECK-NEXT: stxv vs2, 16(r7)
1097 ; CHECK-NEXT: stxv vs3, 0(r7)
1100 ; CHECK-BE-LABEL: test26:
1101 ; CHECK-BE: # %bb.0: # %entry
1102 ; CHECK-BE-NEXT: lxv vs1, 16(r3)
1103 ; CHECK-BE-NEXT: lxv vs0, 0(r3)
1104 ; CHECK-BE-NEXT: lxv vs3, 48(r3)
1105 ; CHECK-BE-NEXT: lxv vs2, 32(r3)
1106 ; CHECK-BE-NEXT: xxmtacc acc0
1107 ; CHECK-BE-NEXT: xvf32gernp acc0, v2, v2
1108 ; CHECK-BE-NEXT: xxmfacc acc0
1109 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
1110 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
1111 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
1112 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
1113 ; CHECK-BE-NEXT: blr
1115 %0 = bitcast i8* %vqp to <512 x i1>*
1116 %1 = load <512 x i1>, <512 x i1>* %0, align 64
1117 %2 = tail call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc)
1118 %3 = bitcast i8* %resp to <512 x i1>*
1119 store <512 x i1> %2, <512 x i1>* %3, align 64
1124 declare <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1>, <16 x i8>, <16 x i8>)
1126 define void @test27(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
1127 ; CHECK-LABEL: test27:
1128 ; CHECK: # %bb.0: # %entry
1129 ; CHECK-NEXT: lxv vs1, 32(r3)
1130 ; CHECK-NEXT: lxv vs0, 48(r3)
1131 ; CHECK-NEXT: lxv vs3, 0(r3)
1132 ; CHECK-NEXT: lxv vs2, 16(r3)
1133 ; CHECK-NEXT: xxmtacc acc0
1134 ; CHECK-NEXT: xvf32gernn acc0, v2, v2
1135 ; CHECK-NEXT: xxmfacc acc0
1136 ; CHECK-NEXT: stxv vs0, 48(r7)
1137 ; CHECK-NEXT: stxv vs1, 32(r7)
1138 ; CHECK-NEXT: stxv vs2, 16(r7)
1139 ; CHECK-NEXT: stxv vs3, 0(r7)
1142 ; CHECK-BE-LABEL: test27:
1143 ; CHECK-BE: # %bb.0: # %entry
1144 ; CHECK-BE-NEXT: lxv vs1, 16(r3)
1145 ; CHECK-BE-NEXT: lxv vs0, 0(r3)
1146 ; CHECK-BE-NEXT: lxv vs3, 48(r3)
1147 ; CHECK-BE-NEXT: lxv vs2, 32(r3)
1148 ; CHECK-BE-NEXT: xxmtacc acc0
1149 ; CHECK-BE-NEXT: xvf32gernn acc0, v2, v2
1150 ; CHECK-BE-NEXT: xxmfacc acc0
1151 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
1152 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
1153 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
1154 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
1155 ; CHECK-BE-NEXT: blr
1157 %0 = bitcast i8* %vqp to <512 x i1>*
1158 %1 = load <512 x i1>, <512 x i1>* %0, align 64
1159 %2 = tail call <512 x i1> @llvm.ppc.mma.xvf32gernn(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc)
1160 %3 = bitcast i8* %resp to <512 x i1>*
1161 store <512 x i1> %2, <512 x i1>* %3, align 64
1166 declare <512 x i1> @llvm.ppc.mma.xvf32gernn(<512 x i1>, <16 x i8>, <16 x i8>)
1168 define void @test28(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
1169 ; CHECK-LABEL: test28:
1170 ; CHECK: # %bb.0: # %entry
1171 ; CHECK-NEXT: pmxvf32ger acc0, v2, v2, 0, 0
1172 ; CHECK-NEXT: xxmfacc acc0
1173 ; CHECK-NEXT: stxv vs0, 48(r7)
1174 ; CHECK-NEXT: stxv vs1, 32(r7)
1175 ; CHECK-NEXT: stxv vs2, 16(r7)
1176 ; CHECK-NEXT: stxv vs3, 0(r7)
1179 ; CHECK-BE-LABEL: test28:
1180 ; CHECK-BE: # %bb.0: # %entry
1181 ; CHECK-BE-NEXT: pmxvf32ger acc0, v2, v2, 0, 0
1182 ; CHECK-BE-NEXT: xxmfacc acc0
1183 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
1184 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
1185 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
1186 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
1187 ; CHECK-BE-NEXT: blr
1189 %0 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0)
1190 %1 = bitcast i8* %resp to <512 x i1>*
1191 store <512 x i1> %0, <512 x i1>* %1, align 64
1196 declare <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8>, <16 x i8>, i32, i32)
1198 define void @test29(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
1199 ; CHECK-LABEL: test29:
1200 ; CHECK: # %bb.0: # %entry
1201 ; CHECK-NEXT: lxv vs1, 32(r3)
1202 ; CHECK-NEXT: lxv vs0, 48(r3)
1203 ; CHECK-NEXT: lxv vs3, 0(r3)
1204 ; CHECK-NEXT: lxv vs2, 16(r3)
1205 ; CHECK-NEXT: xxmtacc acc0
1206 ; CHECK-NEXT: pmxvf32gerpp acc0, v2, v2, 0, 0
1207 ; CHECK-NEXT: xxmfacc acc0
1208 ; CHECK-NEXT: stxv vs0, 48(r7)
1209 ; CHECK-NEXT: stxv vs1, 32(r7)
1210 ; CHECK-NEXT: stxv vs2, 16(r7)
1211 ; CHECK-NEXT: stxv vs3, 0(r7)
1214 ; CHECK-BE-LABEL: test29:
1215 ; CHECK-BE: # %bb.0: # %entry
1216 ; CHECK-BE-NEXT: lxv vs1, 16(r3)
1217 ; CHECK-BE-NEXT: lxv vs0, 0(r3)
1218 ; CHECK-BE-NEXT: lxv vs3, 48(r3)
1219 ; CHECK-BE-NEXT: lxv vs2, 32(r3)
1220 ; CHECK-BE-NEXT: xxmtacc acc0
1221 ; CHECK-BE-NEXT: pmxvf32gerpp acc0, v2, v2, 0, 0
1222 ; CHECK-BE-NEXT: xxmfacc acc0
1223 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
1224 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
1225 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
1226 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
1227 ; CHECK-BE-NEXT: blr
1229 %0 = bitcast i8* %vqp to <512 x i1>*
1230 %1 = load <512 x i1>, <512 x i1>* %0, align 64
1231 %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0)
1232 %3 = bitcast i8* %resp to <512 x i1>*
1233 store <512 x i1> %2, <512 x i1>* %3, align 64
1238 declare <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32)
1240 define void @test30(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
1241 ; CHECK-LABEL: test30:
1242 ; CHECK: # %bb.0: # %entry
1243 ; CHECK-NEXT: lxv vs1, 32(r3)
1244 ; CHECK-NEXT: lxv vs0, 48(r3)
1245 ; CHECK-NEXT: lxv vs3, 0(r3)
1246 ; CHECK-NEXT: lxv vs2, 16(r3)
1247 ; CHECK-NEXT: xxmtacc acc0
1248 ; CHECK-NEXT: pmxvf32gerpn acc0, v2, v2, 0, 0
1249 ; CHECK-NEXT: xxmfacc acc0
1250 ; CHECK-NEXT: stxv vs0, 48(r7)
1251 ; CHECK-NEXT: stxv vs1, 32(r7)
1252 ; CHECK-NEXT: stxv vs2, 16(r7)
1253 ; CHECK-NEXT: stxv vs3, 0(r7)
1256 ; CHECK-BE-LABEL: test30:
1257 ; CHECK-BE: # %bb.0: # %entry
1258 ; CHECK-BE-NEXT: lxv vs1, 16(r3)
1259 ; CHECK-BE-NEXT: lxv vs0, 0(r3)
1260 ; CHECK-BE-NEXT: lxv vs3, 48(r3)
1261 ; CHECK-BE-NEXT: lxv vs2, 32(r3)
1262 ; CHECK-BE-NEXT: xxmtacc acc0
1263 ; CHECK-BE-NEXT: pmxvf32gerpn acc0, v2, v2, 0, 0
1264 ; CHECK-BE-NEXT: xxmfacc acc0
1265 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
1266 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
1267 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
1268 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
1269 ; CHECK-BE-NEXT: blr
1271 %0 = bitcast i8* %vqp to <512 x i1>*
1272 %1 = load <512 x i1>, <512 x i1>* %0, align 64
1273 %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0)
1274 %3 = bitcast i8* %resp to <512 x i1>*
1275 store <512 x i1> %2, <512 x i1>* %3, align 64
1280 declare <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32)
1282 define void @test31(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
1283 ; CHECK-LABEL: test31:
1284 ; CHECK: # %bb.0: # %entry
1285 ; CHECK-NEXT: lxv vs1, 32(r3)
1286 ; CHECK-NEXT: lxv vs0, 48(r3)
1287 ; CHECK-NEXT: lxv vs3, 0(r3)
1288 ; CHECK-NEXT: lxv vs2, 16(r3)
1289 ; CHECK-NEXT: xxmtacc acc0
1290 ; CHECK-NEXT: pmxvf32gernp acc0, v2, v2, 0, 0
1291 ; CHECK-NEXT: xxmfacc acc0
1292 ; CHECK-NEXT: stxv vs0, 48(r7)
1293 ; CHECK-NEXT: stxv vs1, 32(r7)
1294 ; CHECK-NEXT: stxv vs2, 16(r7)
1295 ; CHECK-NEXT: stxv vs3, 0(r7)
1298 ; CHECK-BE-LABEL: test31:
1299 ; CHECK-BE: # %bb.0: # %entry
1300 ; CHECK-BE-NEXT: lxv vs1, 16(r3)
1301 ; CHECK-BE-NEXT: lxv vs0, 0(r3)
1302 ; CHECK-BE-NEXT: lxv vs3, 48(r3)
1303 ; CHECK-BE-NEXT: lxv vs2, 32(r3)
1304 ; CHECK-BE-NEXT: xxmtacc acc0
1305 ; CHECK-BE-NEXT: pmxvf32gernp acc0, v2, v2, 0, 0
1306 ; CHECK-BE-NEXT: xxmfacc acc0
1307 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
1308 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
1309 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
1310 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
1311 ; CHECK-BE-NEXT: blr
1313 %0 = bitcast i8* %vqp to <512 x i1>*
1314 %1 = load <512 x i1>, <512 x i1>* %0, align 64
1315 %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0)
1316 %3 = bitcast i8* %resp to <512 x i1>*
1317 store <512 x i1> %2, <512 x i1>* %3, align 64
1322 declare <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32)
1324 define void @test32(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
1325 ; CHECK-LABEL: test32:
1326 ; CHECK: # %bb.0: # %entry
1327 ; CHECK-NEXT: lxv vs1, 32(r3)
1328 ; CHECK-NEXT: lxv vs0, 48(r3)
1329 ; CHECK-NEXT: lxv vs3, 0(r3)
1330 ; CHECK-NEXT: lxv vs2, 16(r3)
1331 ; CHECK-NEXT: xxmtacc acc0
1332 ; CHECK-NEXT: pmxvf32gernn acc0, v2, v2, 0, 0
1333 ; CHECK-NEXT: xxmfacc acc0
1334 ; CHECK-NEXT: stxv vs0, 48(r7)
1335 ; CHECK-NEXT: stxv vs1, 32(r7)
1336 ; CHECK-NEXT: stxv vs2, 16(r7)
1337 ; CHECK-NEXT: stxv vs3, 0(r7)
1340 ; CHECK-BE-LABEL: test32:
1341 ; CHECK-BE: # %bb.0: # %entry
1342 ; CHECK-BE-NEXT: lxv vs1, 16(r3)
1343 ; CHECK-BE-NEXT: lxv vs0, 0(r3)
1344 ; CHECK-BE-NEXT: lxv vs3, 48(r3)
1345 ; CHECK-BE-NEXT: lxv vs2, 32(r3)
1346 ; CHECK-BE-NEXT: xxmtacc acc0
1347 ; CHECK-BE-NEXT: pmxvf32gernn acc0, v2, v2, 0, 0
1348 ; CHECK-BE-NEXT: xxmfacc acc0
1349 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
1350 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
1351 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
1352 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
1353 ; CHECK-BE-NEXT: blr
1355 %0 = bitcast i8* %vqp to <512 x i1>*
1356 %1 = load <512 x i1>, <512 x i1>* %0, align 64
1357 %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0)
1358 %3 = bitcast i8* %resp to <512 x i1>*
1359 store <512 x i1> %2, <512 x i1>* %3, align 64
1364 declare <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1>, <16 x i8>, <16 x i8>, i32, i32)
1366 define void @test33(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
1367 ; CHECK-LABEL: test33:
1368 ; CHECK: # %bb.0: # %entry
1369 ; CHECK-NEXT: lxv v5, 0(r4)
1370 ; CHECK-NEXT: lxv v4, 16(r4)
1371 ; CHECK-NEXT: xvf64ger acc0, vsp36, v2
1372 ; CHECK-NEXT: xxmfacc acc0
1373 ; CHECK-NEXT: stxv vs0, 48(r7)
1374 ; CHECK-NEXT: stxv vs1, 32(r7)
1375 ; CHECK-NEXT: stxv vs2, 16(r7)
1376 ; CHECK-NEXT: stxv vs3, 0(r7)
1379 ; CHECK-BE-LABEL: test33:
1380 ; CHECK-BE: # %bb.0: # %entry
1381 ; CHECK-BE-NEXT: lxv v5, 16(r4)
1382 ; CHECK-BE-NEXT: lxv v4, 0(r4)
1383 ; CHECK-BE-NEXT: xvf64ger acc0, vsp36, v2
1384 ; CHECK-BE-NEXT: xxmfacc acc0
1385 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
1386 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
1387 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
1388 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
1389 ; CHECK-BE-NEXT: blr
1391 %0 = bitcast i8* %vpp to <256 x i1>*
1392 %1 = load <256 x i1>, <256 x i1>* %0, align 32
1393 %2 = tail call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> %1, <16 x i8> %vc)
1394 %3 = bitcast i8* %resp to <512 x i1>*
1395 store <512 x i1> %2, <512 x i1>* %3, align 64
1400 declare <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1>, <16 x i8>)
1402 define void @test34(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
1403 ; CHECK-LABEL: test34:
1404 ; CHECK: # %bb.0: # %entry
1405 ; CHECK-NEXT: lxv vs1, 32(r3)
1406 ; CHECK-NEXT: lxv vs0, 48(r3)
1407 ; CHECK-NEXT: lxv vs3, 0(r3)
1408 ; CHECK-NEXT: lxv vs2, 16(r3)
1409 ; CHECK-NEXT: xxmtacc acc0
1410 ; CHECK-NEXT: lxv v5, 0(r4)
1411 ; CHECK-NEXT: lxv v4, 16(r4)
1412 ; CHECK-NEXT: xvf64gerpp acc0, vsp36, v2
1413 ; CHECK-NEXT: xxmfacc acc0
1414 ; CHECK-NEXT: stxv vs0, 48(r7)
1415 ; CHECK-NEXT: stxv vs1, 32(r7)
1416 ; CHECK-NEXT: stxv vs2, 16(r7)
1417 ; CHECK-NEXT: stxv vs3, 0(r7)
1420 ; CHECK-BE-LABEL: test34:
1421 ; CHECK-BE: # %bb.0: # %entry
1422 ; CHECK-BE-NEXT: lxv vs1, 16(r3)
1423 ; CHECK-BE-NEXT: lxv vs0, 0(r3)
1424 ; CHECK-BE-NEXT: lxv vs3, 48(r3)
1425 ; CHECK-BE-NEXT: lxv vs2, 32(r3)
1426 ; CHECK-BE-NEXT: xxmtacc acc0
1427 ; CHECK-BE-NEXT: lxv v5, 16(r4)
1428 ; CHECK-BE-NEXT: lxv v4, 0(r4)
1429 ; CHECK-BE-NEXT: xvf64gerpp acc0, vsp36, v2
1430 ; CHECK-BE-NEXT: xxmfacc acc0
1431 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
1432 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
1433 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
1434 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
1435 ; CHECK-BE-NEXT: blr
1437 %0 = bitcast i8* %vqp to <512 x i1>*
1438 %1 = load <512 x i1>, <512 x i1>* %0, align 64
1439 %2 = bitcast i8* %vpp to <256 x i1>*
1440 %3 = load <256 x i1>, <256 x i1>* %2, align 32
1441 %4 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %1, <256 x i1> %3, <16 x i8> %vc)
1442 %5 = bitcast i8* %resp to <512 x i1>*
1443 store <512 x i1> %4, <512 x i1>* %5, align 64
1448 declare <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1>, <256 x i1>, <16 x i8>)
1450 define void @test35(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
1451 ; CHECK-LABEL: test35:
1452 ; CHECK: # %bb.0: # %entry
1453 ; CHECK-NEXT: lxv vs1, 32(r3)
1454 ; CHECK-NEXT: lxv vs0, 48(r3)
1455 ; CHECK-NEXT: lxv vs3, 0(r3)
1456 ; CHECK-NEXT: lxv vs2, 16(r3)
1457 ; CHECK-NEXT: xxmtacc acc0
1458 ; CHECK-NEXT: lxv v5, 0(r4)
1459 ; CHECK-NEXT: lxv v4, 16(r4)
1460 ; CHECK-NEXT: xvf64gerpn acc0, vsp36, v2
1461 ; CHECK-NEXT: xxmfacc acc0
1462 ; CHECK-NEXT: stxv vs0, 48(r7)
1463 ; CHECK-NEXT: stxv vs1, 32(r7)
1464 ; CHECK-NEXT: stxv vs2, 16(r7)
1465 ; CHECK-NEXT: stxv vs3, 0(r7)
1468 ; CHECK-BE-LABEL: test35:
1469 ; CHECK-BE: # %bb.0: # %entry
1470 ; CHECK-BE-NEXT: lxv vs1, 16(r3)
1471 ; CHECK-BE-NEXT: lxv vs0, 0(r3)
1472 ; CHECK-BE-NEXT: lxv vs3, 48(r3)
1473 ; CHECK-BE-NEXT: lxv vs2, 32(r3)
1474 ; CHECK-BE-NEXT: xxmtacc acc0
1475 ; CHECK-BE-NEXT: lxv v5, 16(r4)
1476 ; CHECK-BE-NEXT: lxv v4, 0(r4)
1477 ; CHECK-BE-NEXT: xvf64gerpn acc0, vsp36, v2
1478 ; CHECK-BE-NEXT: xxmfacc acc0
1479 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
1480 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
1481 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
1482 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
1483 ; CHECK-BE-NEXT: blr
1485 %0 = bitcast i8* %vqp to <512 x i1>*
1486 %1 = load <512 x i1>, <512 x i1>* %0, align 64
1487 %2 = bitcast i8* %vpp to <256 x i1>*
1488 %3 = load <256 x i1>, <256 x i1>* %2, align 32
1489 %4 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpn(<512 x i1> %1, <256 x i1> %3, <16 x i8> %vc)
1490 %5 = bitcast i8* %resp to <512 x i1>*
1491 store <512 x i1> %4, <512 x i1>* %5, align 64
1496 declare <512 x i1> @llvm.ppc.mma.xvf64gerpn(<512 x i1>, <256 x i1>, <16 x i8>)
1498 define void @test36(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
1499 ; CHECK-LABEL: test36:
1500 ; CHECK: # %bb.0: # %entry
1501 ; CHECK-NEXT: lxv vs1, 32(r3)
1502 ; CHECK-NEXT: lxv vs0, 48(r3)
1503 ; CHECK-NEXT: lxv vs3, 0(r3)
1504 ; CHECK-NEXT: lxv vs2, 16(r3)
1505 ; CHECK-NEXT: xxmtacc acc0
1506 ; CHECK-NEXT: lxv v5, 0(r4)
1507 ; CHECK-NEXT: lxv v4, 16(r4)
1508 ; CHECK-NEXT: xvf64gernp acc0, vsp36, v2
1509 ; CHECK-NEXT: xxmfacc acc0
1510 ; CHECK-NEXT: stxv vs0, 48(r7)
1511 ; CHECK-NEXT: stxv vs1, 32(r7)
1512 ; CHECK-NEXT: stxv vs2, 16(r7)
1513 ; CHECK-NEXT: stxv vs3, 0(r7)
1516 ; CHECK-BE-LABEL: test36:
1517 ; CHECK-BE: # %bb.0: # %entry
1518 ; CHECK-BE-NEXT: lxv vs1, 16(r3)
1519 ; CHECK-BE-NEXT: lxv vs0, 0(r3)
1520 ; CHECK-BE-NEXT: lxv vs3, 48(r3)
1521 ; CHECK-BE-NEXT: lxv vs2, 32(r3)
1522 ; CHECK-BE-NEXT: xxmtacc acc0
1523 ; CHECK-BE-NEXT: lxv v5, 16(r4)
1524 ; CHECK-BE-NEXT: lxv v4, 0(r4)
1525 ; CHECK-BE-NEXT: xvf64gernp acc0, vsp36, v2
1526 ; CHECK-BE-NEXT: xxmfacc acc0
1527 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
1528 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
1529 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
1530 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
1531 ; CHECK-BE-NEXT: blr
1533 %0 = bitcast i8* %vqp to <512 x i1>*
1534 %1 = load <512 x i1>, <512 x i1>* %0, align 64
1535 %2 = bitcast i8* %vpp to <256 x i1>*
1536 %3 = load <256 x i1>, <256 x i1>* %2, align 32
1537 %4 = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> %1, <256 x i1> %3, <16 x i8> %vc)
1538 %5 = bitcast i8* %resp to <512 x i1>*
1539 store <512 x i1> %4, <512 x i1>* %5, align 64
1544 declare <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>)
1546 define void @test37(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
1547 ; CHECK-LABEL: test37:
1548 ; CHECK: # %bb.0: # %entry
1549 ; CHECK-NEXT: lxv vs1, 32(r3)
1550 ; CHECK-NEXT: lxv vs0, 48(r3)
1551 ; CHECK-NEXT: lxv vs3, 0(r3)
1552 ; CHECK-NEXT: lxv vs2, 16(r3)
1553 ; CHECK-NEXT: xxmtacc acc0
1554 ; CHECK-NEXT: lxv v5, 0(r4)
1555 ; CHECK-NEXT: lxv v4, 16(r4)
1556 ; CHECK-NEXT: xvf64gernn acc0, vsp36, v2
1557 ; CHECK-NEXT: xxmfacc acc0
1558 ; CHECK-NEXT: stxv vs0, 48(r7)
1559 ; CHECK-NEXT: stxv vs1, 32(r7)
1560 ; CHECK-NEXT: stxv vs2, 16(r7)
1561 ; CHECK-NEXT: stxv vs3, 0(r7)
1564 ; CHECK-BE-LABEL: test37:
1565 ; CHECK-BE: # %bb.0: # %entry
1566 ; CHECK-BE-NEXT: lxv vs1, 16(r3)
1567 ; CHECK-BE-NEXT: lxv vs0, 0(r3)
1568 ; CHECK-BE-NEXT: lxv vs3, 48(r3)
1569 ; CHECK-BE-NEXT: lxv vs2, 32(r3)
1570 ; CHECK-BE-NEXT: xxmtacc acc0
1571 ; CHECK-BE-NEXT: lxv v5, 16(r4)
1572 ; CHECK-BE-NEXT: lxv v4, 0(r4)
1573 ; CHECK-BE-NEXT: xvf64gernn acc0, vsp36, v2
1574 ; CHECK-BE-NEXT: xxmfacc acc0
1575 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
1576 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
1577 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
1578 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
1579 ; CHECK-BE-NEXT: blr
1581 %0 = bitcast i8* %vqp to <512 x i1>*
1582 %1 = load <512 x i1>, <512 x i1>* %0, align 64
1583 %2 = bitcast i8* %vpp to <256 x i1>*
1584 %3 = load <256 x i1>, <256 x i1>* %2, align 32
1585 %4 = tail call <512 x i1> @llvm.ppc.mma.xvf64gernn(<512 x i1> %1, <256 x i1> %3, <16 x i8> %vc)
1586 %5 = bitcast i8* %resp to <512 x i1>*
1587 store <512 x i1> %4, <512 x i1>* %5, align 64
1592 declare <512 x i1> @llvm.ppc.mma.xvf64gernn(<512 x i1>, <256 x i1>, <16 x i8>)
1594 define void @test38(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
1595 ; CHECK-LABEL: test38:
1596 ; CHECK: # %bb.0: # %entry
1597 ; CHECK-NEXT: lxv v5, 0(r4)
1598 ; CHECK-NEXT: lxv v4, 16(r4)
1599 ; CHECK-NEXT: pmxvf64ger acc0, vsp36, v2, 0, 0
1600 ; CHECK-NEXT: xxmfacc acc0
1601 ; CHECK-NEXT: stxv vs0, 48(r7)
1602 ; CHECK-NEXT: stxv vs1, 32(r7)
1603 ; CHECK-NEXT: stxv vs2, 16(r7)
1604 ; CHECK-NEXT: stxv vs3, 0(r7)
1607 ; CHECK-BE-LABEL: test38:
1608 ; CHECK-BE: # %bb.0: # %entry
1609 ; CHECK-BE-NEXT: lxv v5, 16(r4)
1610 ; CHECK-BE-NEXT: lxv v4, 0(r4)
1611 ; CHECK-BE-NEXT: pmxvf64ger acc0, vsp36, v2, 0, 0
1612 ; CHECK-BE-NEXT: xxmfacc acc0
1613 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
1614 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
1615 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
1616 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
1617 ; CHECK-BE-NEXT: blr
1619 %0 = bitcast i8* %vpp to <256 x i1>*
1620 %1 = load <256 x i1>, <256 x i1>* %0, align 32
1621 %2 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> %1, <16 x i8> %vc, i32 0, i32 0)
1622 %3 = bitcast i8* %resp to <512 x i1>*
1623 store <512 x i1> %2, <512 x i1>* %3, align 64
1628 declare <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1>, <16 x i8>, i32, i32)
1630 define void @test39(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
1631 ; CHECK-LABEL: test39:
1632 ; CHECK: # %bb.0: # %entry
1633 ; CHECK-NEXT: lxv vs1, 32(r3)
1634 ; CHECK-NEXT: lxv vs0, 48(r3)
1635 ; CHECK-NEXT: lxv vs3, 0(r3)
1636 ; CHECK-NEXT: lxv vs2, 16(r3)
1637 ; CHECK-NEXT: xxmtacc acc0
1638 ; CHECK-NEXT: lxv v5, 0(r4)
1639 ; CHECK-NEXT: lxv v4, 16(r4)
1640 ; CHECK-NEXT: pmxvf64gerpp acc0, vsp36, v2, 0, 0
1641 ; CHECK-NEXT: xxmfacc acc0
1642 ; CHECK-NEXT: stxv vs0, 48(r7)
1643 ; CHECK-NEXT: stxv vs1, 32(r7)
1644 ; CHECK-NEXT: stxv vs2, 16(r7)
1645 ; CHECK-NEXT: stxv vs3, 0(r7)
1648 ; CHECK-BE-LABEL: test39:
1649 ; CHECK-BE: # %bb.0: # %entry
1650 ; CHECK-BE-NEXT: lxv vs1, 16(r3)
1651 ; CHECK-BE-NEXT: lxv vs0, 0(r3)
1652 ; CHECK-BE-NEXT: lxv vs3, 48(r3)
1653 ; CHECK-BE-NEXT: lxv vs2, 32(r3)
1654 ; CHECK-BE-NEXT: xxmtacc acc0
1655 ; CHECK-BE-NEXT: lxv v5, 16(r4)
1656 ; CHECK-BE-NEXT: lxv v4, 0(r4)
1657 ; CHECK-BE-NEXT: pmxvf64gerpp acc0, vsp36, v2, 0, 0
1658 ; CHECK-BE-NEXT: xxmfacc acc0
1659 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
1660 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
1661 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
1662 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
1663 ; CHECK-BE-NEXT: blr
1665 %0 = bitcast i8* %vqp to <512 x i1>*
1666 %1 = load <512 x i1>, <512 x i1>* %0, align 64
1667 %2 = bitcast i8* %vpp to <256 x i1>*
1668 %3 = load <256 x i1>, <256 x i1>* %2, align 32
1669 %4 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1> %1, <256 x i1> %3, <16 x i8> %vc, i32 0, i32 0)
1670 %5 = bitcast i8* %resp to <512 x i1>*
1671 store <512 x i1> %4, <512 x i1>* %5, align 64
1676 declare <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32)
1678 define void @test40(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
1679 ; CHECK-LABEL: test40:
1680 ; CHECK: # %bb.0: # %entry
1681 ; CHECK-NEXT: lxv vs1, 32(r3)
1682 ; CHECK-NEXT: lxv vs0, 48(r3)
1683 ; CHECK-NEXT: lxv vs3, 0(r3)
1684 ; CHECK-NEXT: lxv vs2, 16(r3)
1685 ; CHECK-NEXT: xxmtacc acc0
1686 ; CHECK-NEXT: lxv v5, 0(r4)
1687 ; CHECK-NEXT: lxv v4, 16(r4)
1688 ; CHECK-NEXT: pmxvf64gerpn acc0, vsp36, v2, 0, 0
1689 ; CHECK-NEXT: xxmfacc acc0
1690 ; CHECK-NEXT: stxv vs0, 48(r7)
1691 ; CHECK-NEXT: stxv vs1, 32(r7)
1692 ; CHECK-NEXT: stxv vs2, 16(r7)
1693 ; CHECK-NEXT: stxv vs3, 0(r7)
1696 ; CHECK-BE-LABEL: test40:
1697 ; CHECK-BE: # %bb.0: # %entry
1698 ; CHECK-BE-NEXT: lxv vs1, 16(r3)
1699 ; CHECK-BE-NEXT: lxv vs0, 0(r3)
1700 ; CHECK-BE-NEXT: lxv vs3, 48(r3)
1701 ; CHECK-BE-NEXT: lxv vs2, 32(r3)
1702 ; CHECK-BE-NEXT: xxmtacc acc0
1703 ; CHECK-BE-NEXT: lxv v5, 16(r4)
1704 ; CHECK-BE-NEXT: lxv v4, 0(r4)
1705 ; CHECK-BE-NEXT: pmxvf64gerpn acc0, vsp36, v2, 0, 0
1706 ; CHECK-BE-NEXT: xxmfacc acc0
1707 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
1708 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
1709 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
1710 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
1711 ; CHECK-BE-NEXT: blr
1713 %0 = bitcast i8* %vqp to <512 x i1>*
1714 %1 = load <512 x i1>, <512 x i1>* %0, align 64
1715 %2 = bitcast i8* %vpp to <256 x i1>*
1716 %3 = load <256 x i1>, <256 x i1>* %2, align 32
1717 %4 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1> %1, <256 x i1> %3, <16 x i8> %vc, i32 0, i32 0)
1718 %5 = bitcast i8* %resp to <512 x i1>*
1719 store <512 x i1> %4, <512 x i1>* %5, align 64
1724 declare <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32)
1726 define void @test41(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
1727 ; CHECK-LABEL: test41:
1728 ; CHECK: # %bb.0: # %entry
1729 ; CHECK-NEXT: lxv vs1, 32(r3)
1730 ; CHECK-NEXT: lxv vs0, 48(r3)
1731 ; CHECK-NEXT: lxv vs3, 0(r3)
1732 ; CHECK-NEXT: lxv vs2, 16(r3)
1733 ; CHECK-NEXT: xxmtacc acc0
1734 ; CHECK-NEXT: lxv v5, 0(r4)
1735 ; CHECK-NEXT: lxv v4, 16(r4)
1736 ; CHECK-NEXT: pmxvf64gernp acc0, vsp36, v2, 0, 0
1737 ; CHECK-NEXT: xxmfacc acc0
1738 ; CHECK-NEXT: stxv vs0, 48(r7)
1739 ; CHECK-NEXT: stxv vs1, 32(r7)
1740 ; CHECK-NEXT: stxv vs2, 16(r7)
1741 ; CHECK-NEXT: stxv vs3, 0(r7)
1744 ; CHECK-BE-LABEL: test41:
1745 ; CHECK-BE: # %bb.0: # %entry
1746 ; CHECK-BE-NEXT: lxv vs1, 16(r3)
1747 ; CHECK-BE-NEXT: lxv vs0, 0(r3)
1748 ; CHECK-BE-NEXT: lxv vs3, 48(r3)
1749 ; CHECK-BE-NEXT: lxv vs2, 32(r3)
1750 ; CHECK-BE-NEXT: xxmtacc acc0
1751 ; CHECK-BE-NEXT: lxv v5, 16(r4)
1752 ; CHECK-BE-NEXT: lxv v4, 0(r4)
1753 ; CHECK-BE-NEXT: pmxvf64gernp acc0, vsp36, v2, 0, 0
1754 ; CHECK-BE-NEXT: xxmfacc acc0
1755 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
1756 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
1757 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
1758 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
1759 ; CHECK-BE-NEXT: blr
1761 %0 = bitcast i8* %vqp to <512 x i1>*
1762 %1 = load <512 x i1>, <512 x i1>* %0, align 64
1763 %2 = bitcast i8* %vpp to <256 x i1>*
1764 %3 = load <256 x i1>, <256 x i1>* %2, align 32
1765 %4 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> %1, <256 x i1> %3, <16 x i8> %vc, i32 0, i32 0)
1766 %5 = bitcast i8* %resp to <512 x i1>*
1767 store <512 x i1> %4, <512 x i1>* %5, align 64
1772 declare <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32)
1774 define void @test42(i8* %vqp, i8* %vpp, <16 x i8> %vc, i8* %resp) {
1775 ; CHECK-LABEL: test42:
1776 ; CHECK: # %bb.0: # %entry
1777 ; CHECK-NEXT: lxv vs1, 32(r3)
1778 ; CHECK-NEXT: lxv vs0, 48(r3)
1779 ; CHECK-NEXT: lxv vs3, 0(r3)
1780 ; CHECK-NEXT: lxv vs2, 16(r3)
1781 ; CHECK-NEXT: xxmtacc acc0
1782 ; CHECK-NEXT: lxv v5, 0(r4)
1783 ; CHECK-NEXT: lxv v4, 16(r4)
1784 ; CHECK-NEXT: pmxvf64gernn acc0, vsp36, v2, 0, 0
1785 ; CHECK-NEXT: xxmfacc acc0
1786 ; CHECK-NEXT: stxv vs0, 48(r7)
1787 ; CHECK-NEXT: stxv vs1, 32(r7)
1788 ; CHECK-NEXT: stxv vs2, 16(r7)
1789 ; CHECK-NEXT: stxv vs3, 0(r7)
1792 ; CHECK-BE-LABEL: test42:
1793 ; CHECK-BE: # %bb.0: # %entry
1794 ; CHECK-BE-NEXT: lxv vs1, 16(r3)
1795 ; CHECK-BE-NEXT: lxv vs0, 0(r3)
1796 ; CHECK-BE-NEXT: lxv vs3, 48(r3)
1797 ; CHECK-BE-NEXT: lxv vs2, 32(r3)
1798 ; CHECK-BE-NEXT: xxmtacc acc0
1799 ; CHECK-BE-NEXT: lxv v5, 16(r4)
1800 ; CHECK-BE-NEXT: lxv v4, 0(r4)
1801 ; CHECK-BE-NEXT: pmxvf64gernn acc0, vsp36, v2, 0, 0
1802 ; CHECK-BE-NEXT: xxmfacc acc0
1803 ; CHECK-BE-NEXT: stxv vs1, 16(r7)
1804 ; CHECK-BE-NEXT: stxv vs0, 0(r7)
1805 ; CHECK-BE-NEXT: stxv vs3, 48(r7)
1806 ; CHECK-BE-NEXT: stxv vs2, 32(r7)
1807 ; CHECK-BE-NEXT: blr
1809 %0 = bitcast i8* %vqp to <512 x i1>*
1810 %1 = load <512 x i1>, <512 x i1>* %0, align 64
1811 %2 = bitcast i8* %vpp to <256 x i1>*
1812 %3 = load <256 x i1>, <256 x i1>* %2, align 32
1813 %4 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> %1, <256 x i1> %3, <16 x i8> %vc, i32 0, i32 0)
1814 %5 = bitcast i8* %resp to <512 x i1>*
1815 store <512 x i1> %4, <512 x i1>* %5, align 64
1820 declare <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32)