Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / test / CodeGen / PowerPC / builtins-ppc-pair-mma.c
blob3922513e22469a91ecd6352ceb20c7ff124be272
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -O3 -triple powerpc64le-unknown-unknown -target-cpu pwr10 \
3 // RUN: -emit-llvm %s -o - | FileCheck %s
4 // RUN: %clang_cc1 -O3 -triple powerpc64-unknown-unknown -target-cpu pwr10 \
5 // RUN: -emit-llvm %s -o - | FileCheck %s
7 // CHECK-LABEL: @test1(
8 // CHECK-NEXT: entry:
9 // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], <16 x i8> [[VC]], <16 x i8> [[VC]])
10 // CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2:![0-9]+]]
11 // CHECK-NEXT: ret void
13 void test1(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
14 __vector_quad vq = *((__vector_quad *)vqp);
15 __vector_pair vp = *((__vector_pair *)vpp);
16 __vector_quad res;
17 __builtin_mma_assemble_acc(&res, vc, vc, vc, vc);
18 *((__vector_quad *)resp) = res;
21 // CHECK-LABEL: @test2(
22 // CHECK-NEXT: entry:
23 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64
24 // CHECK-NEXT: [[TMP1:%.*]] = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP0]])
25 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP1]], 0
26 // CHECK-NEXT: store <16 x i8> [[TMP2]], ptr [[RESP:%.*]], align 16
27 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP1]], 1
28 // CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <16 x i8>, ptr [[RESP]], i64 1
29 // CHECK-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP4]], align 16
30 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP1]], 2
31 // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <16 x i8>, ptr [[RESP]], i64 2
32 // CHECK-NEXT: store <16 x i8> [[TMP5]], ptr [[TMP6]], align 16
33 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP1]], 3
34 // CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds <16 x i8>, ptr [[RESP]], i64 3
35 // CHECK-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP8]], align 16
36 // CHECK-NEXT: ret void
38 void test2(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
39 __builtin_mma_disassemble_acc(resp, (__vector_quad*)vqp);
42 // CHECK-LABEL: @test3(
43 // CHECK-NEXT: entry:
44 // CHECK-NEXT: [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
45 // CHECK-NEXT: store <256 x i1> [[TMP0]], ptr [[RESP:%.*]], align 32, !tbaa [[TBAA6:![0-9]+]]
46 // CHECK-NEXT: ret void
48 void test3(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
49 __vector_quad vq = *((__vector_quad *)vqp);
50 __vector_pair vp = *((__vector_pair *)vpp);
51 __vector_pair res;
52 __builtin_vsx_assemble_pair(&res, vc, vc);
53 *((__vector_pair *)resp) = res;
56 // CHECK-LABEL: @test4(
57 // CHECK-NEXT: entry:
58 // CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32
59 // CHECK-NEXT: [[TMP1:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[TMP0]])
60 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP1]], 0
61 // CHECK-NEXT: store <16 x i8> [[TMP2]], ptr [[RESP:%.*]], align 16
62 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP1]], 1
63 // CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <16 x i8>, ptr [[RESP]], i64 1
64 // CHECK-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP4]], align 16
65 // CHECK-NEXT: ret void
67 void test4(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
68 __builtin_vsx_disassemble_pair(resp, (__vector_pair*)vpp);
71 // CHECK-LABEL: @test5(
72 // CHECK-NEXT: entry:
73 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
74 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1> [[TMP0]])
75 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
76 // CHECK-NEXT: ret void
78 void test5(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
79 __vector_quad vq = *((__vector_quad *)vqp);
80 __vector_pair vp = *((__vector_pair *)vpp);
81 __builtin_mma_xxmtacc(&vq);
82 *((__vector_quad *)resp) = vq;
85 // CHECK-LABEL: @test6(
86 // CHECK-NEXT: entry:
87 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
88 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1> [[TMP0]])
89 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
90 // CHECK-NEXT: ret void
92 void test6(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
93 __vector_quad vq = *((__vector_quad *)vqp);
94 __vector_pair vp = *((__vector_pair *)vpp);
95 __builtin_mma_xxmfacc(&vq);
96 *((__vector_quad *)resp) = vq;
99 // CHECK-LABEL: @test7(
100 // CHECK-NEXT: entry:
101 // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
102 // CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
103 // CHECK-NEXT: ret void
105 void test7(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
106 __vector_quad vq = *((__vector_quad *)vqp);
107 __vector_pair vp = *((__vector_pair *)vpp);
108 __builtin_mma_xxsetaccz(&vq);
109 *((__vector_quad *)resp) = vq;
112 // CHECK-LABEL: @test8(
113 // CHECK-NEXT: entry:
114 // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
115 // CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
116 // CHECK-NEXT: ret void
118 void test8(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
119 __vector_quad vq = *((__vector_quad *)vqp);
120 __vector_pair vp = *((__vector_pair *)vpp);
121 __builtin_mma_xvi4ger8(&vq, vc, vc);
122 *((__vector_quad *)resp) = vq;
125 // CHECK-LABEL: @test9(
126 // CHECK-NEXT: entry:
127 // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
128 // CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
129 // CHECK-NEXT: ret void
131 void test9(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
132 __vector_quad vq = *((__vector_quad *)vqp);
133 __vector_pair vp = *((__vector_pair *)vpp);
134 __builtin_mma_xvi8ger4(&vq, vc, vc);
135 *((__vector_quad *)resp) = vq;
138 // CHECK-LABEL: @test10(
139 // CHECK-NEXT: entry:
140 // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
141 // CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
142 // CHECK-NEXT: ret void
144 void test10(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
145 __vector_quad vq = *((__vector_quad *)vqp);
146 __vector_pair vp = *((__vector_pair *)vpp);
147 __builtin_mma_xvi16ger2(&vq, vc, vc);
148 *((__vector_quad *)resp) = vq;
151 // CHECK-LABEL: @test11(
152 // CHECK-NEXT: entry:
153 // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2s(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
154 // CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
155 // CHECK-NEXT: ret void
157 void test11(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
158 __vector_quad vq = *((__vector_quad *)vqp);
159 __vector_pair vp = *((__vector_pair *)vpp);
160 __builtin_mma_xvi16ger2s(&vq, vc, vc);
161 *((__vector_quad *)resp) = vq;
164 // CHECK-LABEL: @test12(
165 // CHECK-NEXT: entry:
166 // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
167 // CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
168 // CHECK-NEXT: ret void
170 void test12(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
171 __vector_quad vq = *((__vector_quad *)vqp);
172 __vector_pair vp = *((__vector_pair *)vpp);
173 __builtin_mma_xvf16ger2(&vq, vc, vc);
174 *((__vector_quad *)resp) = vq;
177 // CHECK-LABEL: @test13(
178 // CHECK-NEXT: entry:
179 // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
180 // CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
181 // CHECK-NEXT: ret void
183 void test13(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
184 __vector_quad vq = *((__vector_quad *)vqp);
185 __vector_pair vp = *((__vector_pair *)vpp);
186 __builtin_mma_xvf32ger(&vq, vc, vc);
187 *((__vector_quad *)resp) = vq;
190 // CHECK-LABEL: @test14(
191 // CHECK-NEXT: entry:
192 // CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]]
193 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]])
194 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
195 // CHECK-NEXT: ret void
197 void test14(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
198 __vector_quad vq = *((__vector_quad *)vqp);
199 __vector_pair vp = *((__vector_pair *)vpp);
200 __builtin_mma_xvf64ger(&vq, vp, vc);
201 *((__vector_quad *)resp) = vq;
204 // CHECK-LABEL: @test15(
205 // CHECK-NEXT: entry:
206 // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
207 // CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
208 // CHECK-NEXT: ret void
210 void test15(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
211 __vector_quad vq = *((__vector_quad *)vqp);
212 __vector_pair vp = *((__vector_pair *)vpp);
213 __builtin_mma_pmxvi4ger8(&vq, vc, vc, 0, 0, 0);
214 *((__vector_quad *)resp) = vq;
217 // CHECK-LABEL: @test16(
218 // CHECK-NEXT: entry:
219 // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
220 // CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
221 // CHECK-NEXT: ret void
223 void test16(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
224 __vector_quad vq = *((__vector_quad *)vqp);
225 __vector_pair vp = *((__vector_pair *)vpp);
226 __builtin_mma_pmxvi8ger4(&vq, vc, vc, 0, 0, 0);
227 *((__vector_quad *)resp) = vq;
230 // CHECK-LABEL: @test17(
231 // CHECK-NEXT: entry:
232 // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
233 // CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
234 // CHECK-NEXT: ret void
236 void test17(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
237 __vector_quad vq = *((__vector_quad *)vqp);
238 __vector_pair vp = *((__vector_pair *)vpp);
239 __builtin_mma_pmxvi16ger2(&vq, vc, vc, 0, 0, 0);
240 *((__vector_quad *)resp) = vq;
243 // CHECK-LABEL: @test18(
244 // CHECK-NEXT: entry:
245 // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
246 // CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
247 // CHECK-NEXT: ret void
249 void test18(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
250 __vector_quad vq = *((__vector_quad *)vqp);
251 __vector_pair vp = *((__vector_pair *)vpp);
252 __builtin_mma_pmxvi16ger2s(&vq, vc, vc, 0, 0, 0);
253 *((__vector_quad *)resp) = vq;
256 // CHECK-LABEL: @test19(
257 // CHECK-NEXT: entry:
258 // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
259 // CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
260 // CHECK-NEXT: ret void
262 void test19(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
263 __vector_quad vq = *((__vector_quad *)vqp);
264 __vector_pair vp = *((__vector_pair *)vpp);
265 __builtin_mma_pmxvf16ger2(&vq, vc, vc, 0, 0, 0);
266 *((__vector_quad *)resp) = vq;
269 // CHECK-LABEL: @test20(
270 // CHECK-NEXT: entry:
271 // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
272 // CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
273 // CHECK-NEXT: ret void
275 void test20(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
276 __vector_quad vq = *((__vector_quad *)vqp);
277 __vector_pair vp = *((__vector_pair *)vpp);
278 __builtin_mma_pmxvf32ger(&vq, vc, vc, 0, 0);
279 *((__vector_quad *)resp) = vq;
282 // CHECK-LABEL: @test21(
283 // CHECK-NEXT: entry:
284 // CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]]
285 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
286 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
287 // CHECK-NEXT: ret void
289 void test21(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
290 __vector_quad vq = *((__vector_quad *)vqp);
291 __vector_pair vp = *((__vector_pair *)vpp);
292 __builtin_mma_pmxvf64ger(&vq, vp, vc, 0, 0);
293 *((__vector_quad *)resp) = vq;
296 // CHECK-LABEL: @test22(
297 // CHECK-NEXT: entry:
298 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
299 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
300 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
301 // CHECK-NEXT: ret void
303 void test22(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
304 __vector_quad vq = *((__vector_quad *)vqp);
305 __vector_pair vp = *((__vector_pair *)vpp);
306 __builtin_mma_xvi4ger8pp(&vq, vc, vc);
307 *((__vector_quad *)resp) = vq;
310 // CHECK-LABEL: @test23(
311 // CHECK-NEXT: entry:
312 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
313 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
314 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
315 // CHECK-NEXT: ret void
317 void test23(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
318 __vector_quad vq = *((__vector_quad *)vqp);
319 __vector_pair vp = *((__vector_pair *)vpp);
320 __builtin_mma_xvi8ger4pp(&vq, vc, vc);
321 *((__vector_quad *)resp) = vq;
324 // CHECK-LABEL: @test24(
325 // CHECK-NEXT: entry:
326 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
327 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4spp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
328 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
329 // CHECK-NEXT: ret void
331 void test24(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
332 __vector_quad vq = *((__vector_quad *)vqp);
333 __vector_pair vp = *((__vector_pair *)vpp);
334 __builtin_mma_xvi8ger4spp(&vq, vc, vc);
335 *((__vector_quad *)resp) = vq;
338 // CHECK-LABEL: @test25(
339 // CHECK-NEXT: entry:
340 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
341 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
342 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
343 // CHECK-NEXT: ret void
345 void test25(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
346 __vector_quad vq = *((__vector_quad *)vqp);
347 __vector_pair vp = *((__vector_pair *)vpp);
348 __builtin_mma_xvi16ger2pp(&vq, vc, vc);
349 *((__vector_quad *)resp) = vq;
352 // CHECK-LABEL: @test26(
353 // CHECK-NEXT: entry:
354 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
355 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2spp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
356 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
357 // CHECK-NEXT: ret void
359 void test26(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
360 __vector_quad vq = *((__vector_quad *)vqp);
361 __vector_pair vp = *((__vector_pair *)vpp);
362 __builtin_mma_xvi16ger2spp(&vq, vc, vc);
363 *((__vector_quad *)resp) = vq;
366 // CHECK-LABEL: @test27(
367 // CHECK-NEXT: entry:
368 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
369 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
370 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
371 // CHECK-NEXT: ret void
373 void test27(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
374 __vector_quad vq = *((__vector_quad *)vqp);
375 __vector_pair vp = *((__vector_pair *)vpp);
376 __builtin_mma_pmxvi4ger8pp(&vq, vc, vc, 0, 0, 0);
377 *((__vector_quad *)resp) = vq;
380 // CHECK-LABEL: @test28(
381 // CHECK-NEXT: entry:
382 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
383 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
384 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
385 // CHECK-NEXT: ret void
387 void test28(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
388 __vector_quad vq = *((__vector_quad *)vqp);
389 __vector_pair vp = *((__vector_pair *)vpp);
390 __builtin_mma_pmxvi8ger4pp(&vq, vc, vc, 0, 0, 0);
391 *((__vector_quad *)resp) = vq;
394 // CHECK-LABEL: @test29(
395 // CHECK-NEXT: entry:
396 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
397 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
398 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
399 // CHECK-NEXT: ret void
401 void test29(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
402 __vector_quad vq = *((__vector_quad *)vqp);
403 __vector_pair vp = *((__vector_pair *)vpp);
404 __builtin_mma_pmxvi8ger4spp(&vq, vc, vc, 0, 0, 0);
405 *((__vector_quad *)resp) = vq;
408 // CHECK-LABEL: @test30(
409 // CHECK-NEXT: entry:
410 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
411 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
412 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
413 // CHECK-NEXT: ret void
415 void test30(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
416 __vector_quad vq = *((__vector_quad *)vqp);
417 __vector_pair vp = *((__vector_pair *)vpp);
418 __builtin_mma_pmxvi16ger2pp(&vq, vc, vc, 0, 0, 0);
419 *((__vector_quad *)resp) = vq;
422 // CHECK-LABEL: @test31(
423 // CHECK-NEXT: entry:
424 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
425 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
426 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
427 // CHECK-NEXT: ret void
429 void test31(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
430 __vector_quad vq = *((__vector_quad *)vqp);
431 __vector_pair vp = *((__vector_pair *)vpp);
432 __builtin_mma_pmxvi16ger2spp(&vq, vc, vc, 0, 0, 0);
433 *((__vector_quad *)resp) = vq;
436 // CHECK-LABEL: @test32(
437 // CHECK-NEXT: entry:
438 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
439 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
440 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
441 // CHECK-NEXT: ret void
443 void test32(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
444 __vector_quad vq = *((__vector_quad *)vqp);
445 __vector_pair vp = *((__vector_pair *)vpp);
446 __builtin_mma_xvf16ger2pp(&vq, vc, vc);
447 *((__vector_quad *)resp) = vq;
450 // CHECK-LABEL: @test33(
451 // CHECK-NEXT: entry:
452 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
453 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
454 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
455 // CHECK-NEXT: ret void
457 void test33(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
458 __vector_quad vq = *((__vector_quad *)vqp);
459 __vector_pair vp = *((__vector_pair *)vpp);
460 __builtin_mma_xvf16ger2pn(&vq, vc, vc);
461 *((__vector_quad *)resp) = vq;
464 // CHECK-LABEL: @test34(
465 // CHECK-NEXT: entry:
466 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
467 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2np(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
468 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
469 // CHECK-NEXT: ret void
471 void test34(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
472 __vector_quad vq = *((__vector_quad *)vqp);
473 __vector_pair vp = *((__vector_pair *)vpp);
474 __builtin_mma_xvf16ger2np(&vq, vc, vc);
475 *((__vector_quad *)resp) = vq;
478 // CHECK-LABEL: @test35(
479 // CHECK-NEXT: entry:
480 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
481 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2nn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
482 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
483 // CHECK-NEXT: ret void
485 void test35(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
486 __vector_quad vq = *((__vector_quad *)vqp);
487 __vector_pair vp = *((__vector_pair *)vpp);
488 __builtin_mma_xvf16ger2nn(&vq, vc, vc);
489 *((__vector_quad *)resp) = vq;
492 // CHECK-LABEL: @test36(
493 // CHECK-NEXT: entry:
494 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
495 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
496 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
497 // CHECK-NEXT: ret void
499 void test36(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
500 __vector_quad vq = *((__vector_quad *)vqp);
501 __vector_pair vp = *((__vector_pair *)vpp);
502 __builtin_mma_pmxvf16ger2pp(&vq, vc, vc, 0, 0, 0);
503 *((__vector_quad *)resp) = vq;
506 // CHECK-LABEL: @test37(
507 // CHECK-NEXT: entry:
508 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
509 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
510 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
511 // CHECK-NEXT: ret void
513 void test37(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
514 __vector_quad vq = *((__vector_quad *)vqp);
515 __vector_pair vp = *((__vector_pair *)vpp);
516 __builtin_mma_pmxvf16ger2pn(&vq, vc, vc, 0, 0, 0);
517 *((__vector_quad *)resp) = vq;
520 // CHECK-LABEL: @test38(
521 // CHECK-NEXT: entry:
522 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
523 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2np(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
524 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
525 // CHECK-NEXT: ret void
527 void test38(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
528 __vector_quad vq = *((__vector_quad *)vqp);
529 __vector_pair vp = *((__vector_pair *)vpp);
530 __builtin_mma_pmxvf16ger2np(&vq, vc, vc, 0, 0, 0);
531 *((__vector_quad *)resp) = vq;
534 // CHECK-LABEL: @test39(
535 // CHECK-NEXT: entry:
536 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
537 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2nn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
538 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
539 // CHECK-NEXT: ret void
541 void test39(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
542 __vector_quad vq = *((__vector_quad *)vqp);
543 __vector_pair vp = *((__vector_pair *)vpp);
544 __builtin_mma_pmxvf16ger2nn(&vq, vc, vc, 0, 0, 0);
545 *((__vector_quad *)resp) = vq;
548 // CHECK-LABEL: @test40(
549 // CHECK-NEXT: entry:
550 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
551 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
552 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
553 // CHECK-NEXT: ret void
555 void test40(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
556 __vector_quad vq = *((__vector_quad *)vqp);
557 __vector_pair vp = *((__vector_pair *)vpp);
558 __builtin_mma_xvf32gerpp(&vq, vc, vc);
559 *((__vector_quad *)resp) = vq;
562 // CHECK-LABEL: @test41(
563 // CHECK-NEXT: entry:
564 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
565 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
566 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
567 // CHECK-NEXT: ret void
569 void test41(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
570 __vector_quad vq = *((__vector_quad *)vqp);
571 __vector_pair vp = *((__vector_pair *)vpp);
572 __builtin_mma_xvf32gerpn(&vq, vc, vc);
573 *((__vector_quad *)resp) = vq;
576 // CHECK-LABEL: @test42(
577 // CHECK-NEXT: entry:
578 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
579 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
580 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
581 // CHECK-NEXT: ret void
583 void test42(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
584 __vector_quad vq = *((__vector_quad *)vqp);
585 __vector_pair vp = *((__vector_pair *)vpp);
586 __builtin_mma_xvf32gernp(&vq, vc, vc);
587 *((__vector_quad *)resp) = vq;
590 // CHECK-LABEL: @test43(
591 // CHECK-NEXT: entry:
592 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
593 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf32gernn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
594 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
595 // CHECK-NEXT: ret void
597 void test43(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
598 __vector_quad vq = *((__vector_quad *)vqp);
599 __vector_pair vp = *((__vector_pair *)vpp);
600 __builtin_mma_xvf32gernn(&vq, vc, vc);
601 *((__vector_quad *)resp) = vq;
604 // CHECK-LABEL: @test44(
605 // CHECK-NEXT: entry:
606 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
607 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
608 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
609 // CHECK-NEXT: ret void
611 void test44(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
612 __vector_quad vq = *((__vector_quad *)vqp);
613 __vector_pair vp = *((__vector_pair *)vpp);
614 __builtin_mma_pmxvf32gerpp(&vq, vc, vc, 0, 0);
615 *((__vector_quad *)resp) = vq;
618 // CHECK-LABEL: @test45(
619 // CHECK-NEXT: entry:
620 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
621 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
622 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
623 // CHECK-NEXT: ret void
625 void test45(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
626 __vector_quad vq = *((__vector_quad *)vqp);
627 __vector_pair vp = *((__vector_pair *)vpp);
628 __builtin_mma_pmxvf32gerpn(&vq, vc, vc, 0, 0);
629 *((__vector_quad *)resp) = vq;
632 // CHECK-LABEL: @test46(
633 // CHECK-NEXT: entry:
634 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
635 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
636 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
637 // CHECK-NEXT: ret void
639 void test46(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
640 __vector_quad vq = *((__vector_quad *)vqp);
641 __vector_pair vp = *((__vector_pair *)vpp);
642 __builtin_mma_pmxvf32gernp(&vq, vc, vc, 0, 0);
643 *((__vector_quad *)resp) = vq;
646 // CHECK-LABEL: @test47(
647 // CHECK-NEXT: entry:
648 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
649 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0)
650 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
651 // CHECK-NEXT: ret void
653 void test47(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
654 __vector_quad vq = *((__vector_quad *)vqp);
655 __vector_pair vp = *((__vector_pair *)vpp);
656 __builtin_mma_pmxvf32gernn(&vq, vc, vc, 0, 0);
657 *((__vector_quad *)resp) = vq;
660 // CHECK-LABEL: @test48(
661 // CHECK-NEXT: entry:
662 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
663 // CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]]
664 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
665 // CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
666 // CHECK-NEXT: ret void
668 void test48(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
669 __vector_quad vq = *((__vector_quad *)vqp);
670 __vector_pair vp = *((__vector_pair *)vpp);
671 __builtin_mma_xvf64gerpp(&vq, vp, vc);
672 *((__vector_quad *)resp) = vq;
675 // CHECK-LABEL: @test49(
676 // CHECK-NEXT: entry:
677 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
678 // CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]]
679 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpn(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
680 // CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
681 // CHECK-NEXT: ret void
683 void test49(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
684 __vector_quad vq = *((__vector_quad *)vqp);
685 __vector_pair vp = *((__vector_pair *)vpp);
686 __builtin_mma_xvf64gerpn(&vq, vp, vc);
687 *((__vector_quad *)resp) = vq;
690 // CHECK-LABEL: @test50(
691 // CHECK-NEXT: entry:
692 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
693 // CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]]
694 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
695 // CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
696 // CHECK-NEXT: ret void
698 void test50(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
699 __vector_quad vq = *((__vector_quad *)vqp);
700 __vector_pair vp = *((__vector_pair *)vpp);
701 __builtin_mma_xvf64gernp(&vq, vp, vc);
702 *((__vector_quad *)resp) = vq;
705 // CHECK-LABEL: @test51(
706 // CHECK-NEXT: entry:
707 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
708 // CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]]
709 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernn(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
710 // CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
711 // CHECK-NEXT: ret void
713 void test51(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
714 __vector_quad vq = *((__vector_quad *)vqp);
715 __vector_pair vp = *((__vector_pair *)vpp);
716 __builtin_mma_xvf64gernn(&vq, vp, vc);
717 *((__vector_quad *)resp) = vq;
720 // CHECK-LABEL: @test52(
721 // CHECK-NEXT: entry:
722 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
723 // CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]]
724 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
725 // CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
726 // CHECK-NEXT: ret void
728 void test52(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
729 __vector_quad vq = *((__vector_quad *)vqp);
730 __vector_pair vp = *((__vector_pair *)vpp);
731 __builtin_mma_pmxvf64gerpp(&vq, vp, vc, 0, 0);
732 *((__vector_quad *)resp) = vq;
735 // CHECK-LABEL: @test53(
736 // CHECK-NEXT: entry:
737 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
738 // CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]]
739 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
740 // CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
741 // CHECK-NEXT: ret void
743 void test53(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
744 __vector_quad vq = *((__vector_quad *)vqp);
745 __vector_pair vp = *((__vector_pair *)vpp);
746 __builtin_mma_pmxvf64gerpn(&vq, vp, vc, 0, 0);
747 *((__vector_quad *)resp) = vq;
750 // CHECK-LABEL: @test54(
751 // CHECK-NEXT: entry:
752 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
753 // CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]]
754 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
755 // CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
756 // CHECK-NEXT: ret void
758 void test54(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
759 __vector_quad vq = *((__vector_quad *)vqp);
760 __vector_pair vp = *((__vector_pair *)vpp);
761 __builtin_mma_pmxvf64gernp(&vq, vp, vc, 0, 0);
762 *((__vector_quad *)resp) = vq;
765 // CHECK-LABEL: @test55(
766 // CHECK-NEXT: entry:
767 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
768 // CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA6]]
769 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
770 // CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
771 // CHECK-NEXT: ret void
773 void test55(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
774 __vector_quad vq = *((__vector_quad *)vqp);
775 __vector_pair vp = *((__vector_pair *)vpp);
776 __builtin_mma_pmxvf64gernn(&vq, vp, vc, 0, 0);
777 *((__vector_quad *)resp) = vq;
780 // CHECK-LABEL: @test56(
781 // CHECK-NEXT: entry:
782 // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
783 // CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
784 // CHECK-NEXT: ret void
786 void test56(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
787 __vector_quad vq = *((__vector_quad *)vqp);
788 __vector_pair vp = *((__vector_pair *)vpp);
789 __builtin_mma_xvbf16ger2(&vq, vc, vc);
790 *((__vector_quad *)resp) = vq;
793 // CHECK-LABEL: @test57(
794 // CHECK-NEXT: entry:
795 // CHECK-NEXT: [[TMP0:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
796 // CHECK-NEXT: store <512 x i1> [[TMP0]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
797 // CHECK-NEXT: ret void
799 void test57(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
800 __vector_quad vq = *((__vector_quad *)vqp);
801 __vector_pair vp = *((__vector_pair *)vpp);
802 __builtin_mma_pmxvbf16ger2(&vq, vc, vc, 0, 0, 0);
803 *((__vector_quad *)resp) = vq;
806 // CHECK-LABEL: @test58(
807 // CHECK-NEXT: entry:
808 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
809 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
810 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
811 // CHECK-NEXT: ret void
813 void test58(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
814 __vector_quad vq = *((__vector_quad *)vqp);
815 __vector_pair vp = *((__vector_pair *)vpp);
816 __builtin_mma_xvbf16ger2pp(&vq, vc, vc);
817 *((__vector_quad *)resp) = vq;
820 // CHECK-LABEL: @test59(
821 // CHECK-NEXT: entry:
822 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
823 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2pn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
824 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
825 // CHECK-NEXT: ret void
827 void test59(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
828 __vector_quad vq = *((__vector_quad *)vqp);
829 __vector_pair vp = *((__vector_pair *)vpp);
830 __builtin_mma_xvbf16ger2pn(&vq, vc, vc);
831 *((__vector_quad *)resp) = vq;
834 // CHECK-LABEL: @test60(
835 // CHECK-NEXT: entry:
836 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
837 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2np(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
838 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
839 // CHECK-NEXT: ret void
841 void test60(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
842 __vector_quad vq = *((__vector_quad *)vqp);
843 __vector_pair vp = *((__vector_pair *)vpp);
844 __builtin_mma_xvbf16ger2np(&vq, vc, vc);
845 *((__vector_quad *)resp) = vq;
848 // CHECK-LABEL: @test61(
849 // CHECK-NEXT: entry:
850 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
851 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvbf16ger2nn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
852 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
853 // CHECK-NEXT: ret void
855 void test61(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
856 __vector_quad vq = *((__vector_quad *)vqp);
857 __vector_pair vp = *((__vector_pair *)vpp);
858 __builtin_mma_xvbf16ger2nn(&vq, vc, vc);
859 *((__vector_quad *)resp) = vq;
862 // CHECK-LABEL: @test62(
863 // CHECK-NEXT: entry:
864 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
865 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pp(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
866 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
867 // CHECK-NEXT: ret void
869 void test62(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
870 __vector_quad vq = *((__vector_quad *)vqp);
871 __vector_pair vp = *((__vector_pair *)vpp);
872 __builtin_mma_pmxvbf16ger2pp(&vq, vc, vc, 0, 0, 0);
873 *((__vector_quad *)resp) = vq;
876 // CHECK-LABEL: @test63(
877 // CHECK-NEXT: entry:
878 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
879 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
880 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
881 // CHECK-NEXT: ret void
883 void test63(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
884 __vector_quad vq = *((__vector_quad *)vqp);
885 __vector_pair vp = *((__vector_pair *)vpp);
886 __builtin_mma_pmxvbf16ger2pn(&vq, vc, vc, 0, 0, 0);
887 *((__vector_quad *)resp) = vq;
890 // CHECK-LABEL: @test64(
891 // CHECK-NEXT: entry:
892 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
893 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2np(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
894 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
895 // CHECK-NEXT: ret void
897 void test64(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
898 __vector_quad vq = *((__vector_quad *)vqp);
899 __vector_pair vp = *((__vector_pair *)vpp);
900 __builtin_mma_pmxvbf16ger2np(&vq, vc, vc, 0, 0, 0);
901 *((__vector_quad *)resp) = vq;
904 // CHECK-LABEL: @test65(
905 // CHECK-NEXT: entry:
906 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
907 // CHECK-NEXT: [[TMP1:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2nn(<512 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], <16 x i8> [[VC]], i32 0, i32 0, i32 0)
908 // CHECK-NEXT: store <512 x i1> [[TMP1]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
909 // CHECK-NEXT: ret void
911 void test65(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
912 __vector_quad vq = *((__vector_quad *)vqp);
913 __vector_pair vp = *((__vector_pair *)vpp);
914 __builtin_mma_pmxvbf16ger2nn(&vq, vc, vc, 0, 0, 0);
915 *((__vector_quad *)resp) = vq;
918 // CHECK-LABEL: @test66(
919 // CHECK-NEXT: entry:
920 // CHECK-NEXT: [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[VPP:%.*]])
921 // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP0]], ptr [[VP2:%.*]])
922 // CHECK-NEXT: ret void
924 void test66(const __vector_pair *vpp, __vector_pair *vp2) {
925 __vector_pair vp = __builtin_vsx_lxvp(0L, vpp);
926 __builtin_vsx_stxvp(vp, 0L, vp2);
929 // CHECK-LABEL: @test67(
930 // CHECK-NEXT: entry:
931 // CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 [[OFFSET:%.*]]
932 // CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
933 // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 [[OFFSET]]
934 // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]])
935 // CHECK-NEXT: ret void
937 void test67(const __vector_pair *vpp, signed long offset, __vector_pair *vp2) {
938 __vector_pair vp = __builtin_vsx_lxvp(offset, vpp);
939 __builtin_vsx_stxvp(vp, offset, vp2);
942 // CHECK-LABEL: @test68(
943 // CHECK-NEXT: entry:
944 // CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 18
945 // CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
946 // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 18
947 // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]])
948 // CHECK-NEXT: ret void
950 void test68(const __vector_pair *vpp, __vector_pair *vp2) {
951 __vector_pair vp = __builtin_vsx_lxvp(18L, vpp);
952 __builtin_vsx_stxvp(vp, 18L, vp2);
955 // CHECK-LABEL: @test69(
956 // CHECK-NEXT: entry:
957 // CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 1
958 // CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
959 // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 1
960 // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]])
961 // CHECK-NEXT: ret void
963 void test69(const __vector_pair *vpp, __vector_pair *vp2) {
964 __vector_pair vp = __builtin_vsx_lxvp(1L, vpp);
965 __builtin_vsx_stxvp(vp, 1L, vp2);
968 // CHECK-LABEL: @test70(
969 // CHECK-NEXT: entry:
970 // CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 42
971 // CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
972 // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 42
973 // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]])
974 // CHECK-NEXT: ret void
976 void test70(const __vector_pair *vpp, __vector_pair *vp2) {
977 __vector_pair vp = __builtin_vsx_lxvp(42L, vpp);
978 __builtin_vsx_stxvp(vp, 42L, vp2);
981 // CHECK-LABEL: @test71(
982 // CHECK-NEXT: entry:
983 // CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 32768
984 // CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
985 // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 32768
986 // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]])
987 // CHECK-NEXT: ret void
989 void test71(const __vector_pair *vpp, __vector_pair *vp2) {
990 __vector_pair vp = __builtin_vsx_lxvp(32768L, vpp);
991 __builtin_vsx_stxvp(vp, 32768L, vp2);
994 // CHECK-LABEL: @test72(
995 // CHECK-NEXT: entry:
996 // CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 32799
997 // CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
998 // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 32799
999 // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]])
1000 // CHECK-NEXT: ret void
1002 void test72(const __vector_pair *vpp, __vector_pair *vp2) {
1003 __vector_pair vp = __builtin_vsx_lxvp(32799L, vpp);
1004 __builtin_vsx_stxvp(vp, 32799L, vp2);
1007 // CHECK-LABEL: @test73(
1008 // CHECK-NEXT: entry:
1009 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
1010 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 8
1011 // CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP1]])
1012 // CHECK-NEXT: [[TMP3:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP0]], <256 x i1> [[TMP2]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
1013 // CHECK-NEXT: store <512 x i1> [[TMP3]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
1014 // CHECK-NEXT: ret void
1016 void test73(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
1017 __vector_quad vq = *((__vector_quad *)vqp);
1018 __vector_pair vp = __builtin_vsx_lxvp(8L, vpp);
1019 __builtin_mma_pmxvf64gernn(&vq, vp, vc, 0, 0);
1020 *((__vector_quad *)resp) = vq;
1023 // CHECK-LABEL: @test74(
1024 // CHECK-NEXT: entry:
1025 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
1026 // CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[VPP:%.*]])
1027 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
1028 // CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
1029 // CHECK-NEXT: ret void
1031 void test74(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
1032 __vector_quad vq = *((__vector_quad *)vqp);
1033 __vector_pair vp = __builtin_vsx_lxvp(0L, vpp);
1034 __builtin_mma_xvf64gernp(&vq, vp, vc);
1035 *((__vector_quad *)resp) = vq;
1038 // CHECK-LABEL: @test75(
1039 // CHECK-NEXT: entry:
1040 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
1041 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 [[OFFS:%.*]]
1042 // CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP1]])
1043 // CHECK-NEXT: [[TMP3:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP2]], <16 x i8> [[VC:%.*]])
1044 // CHECK-NEXT: store <512 x i1> [[TMP3]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
1045 // CHECK-NEXT: ret void
1047 void test75(unsigned char *vqp, signed long offs, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
1048 __vector_quad vq = *((__vector_quad *)vqp);
1049 __vector_pair vp = __builtin_vsx_lxvp(offs, vpp);
1050 __builtin_mma_xvf64gernp(&vq, vp, vc);
1051 *((__vector_quad *)resp) = vq;
1054 // CHECK-LABEL: @test76(
1055 // CHECK-NEXT: entry:
1056 // CHECK-NEXT: [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[VC:%.*]], <16 x i8> [[VC]])
1057 // CHECK-NEXT: store <256 x i1> [[TMP0]], ptr [[RESP:%.*]], align 32, !tbaa [[TBAA6]]
1058 // CHECK-NEXT: ret void
1060 void test76(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
1061 __vector_quad vq = *((__vector_quad *)vqp);
1062 __vector_pair vp = *((__vector_pair *)vpp);
1063 __vector_pair res;
1064 __builtin_mma_assemble_pair(&res, vc, vc);
1065 *((__vector_pair *)resp) = res;
1068 // CHECK-LABEL: @test77(
1069 // CHECK-NEXT: entry:
1070 // CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32
1071 // CHECK-NEXT: [[TMP1:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[TMP0]])
1072 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP1]], 0
1073 // CHECK-NEXT: store <16 x i8> [[TMP2]], ptr [[RESP:%.*]], align 16
1074 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP1]], 1
1075 // CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds <16 x i8>, ptr [[RESP]], i64 1
1076 // CHECK-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP4]], align 16
1077 // CHECK-NEXT: ret void
1079 void test77(unsigned char *vqp, unsigned char *vpp, vector unsigned char vc, unsigned char *resp) {
1080 __builtin_mma_disassemble_pair(resp, (__vector_pair*)vpp);
1083 // CHECK-LABEL: @test78(
1084 // CHECK-NEXT: entry:
1085 // CHECK-NEXT: [[TMP0:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[VPP:%.*]])
1086 // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP0]], ptr [[VP2:%.*]])
1087 // CHECK-NEXT: ret void
1089 void test78(const __vector_pair *vpp, __vector_pair *vp2) {
1090 __vector_pair vp = __builtin_mma_lxvp(0L, vpp);
1091 __builtin_mma_stxvp(vp, 0L, vp2);
1094 // CHECK-LABEL: @test79(
1095 // CHECK-NEXT: entry:
1096 // CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 [[OFFSET:%.*]]
1097 // CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
1098 // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 [[OFFSET]]
1099 // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]])
1100 // CHECK-NEXT: ret void
1102 void test79(const __vector_pair *vpp, signed long offset, __vector_pair *vp2) {
1103 __vector_pair vp = __builtin_mma_lxvp(offset, vpp);
1104 __builtin_mma_stxvp(vp, offset, vp2);
1107 // CHECK-LABEL: @test80(
1108 // CHECK-NEXT: entry:
1109 // CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 18
1110 // CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
1111 // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 18
1112 // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]])
1113 // CHECK-NEXT: ret void
1115 void test80(const __vector_pair *vpp, __vector_pair *vp2) {
1116 __vector_pair vp = __builtin_mma_lxvp(18L, vpp);
1117 __builtin_mma_stxvp(vp, 18L, vp2);
1120 // CHECK-LABEL: @test81(
1121 // CHECK-NEXT: entry:
1122 // CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 1
1123 // CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
1124 // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 1
1125 // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]])
1126 // CHECK-NEXT: ret void
1128 void test81(const __vector_pair *vpp, __vector_pair *vp2) {
1129 __vector_pair vp = __builtin_mma_lxvp(1L, vpp);
1130 __builtin_mma_stxvp(vp, 1L, vp2);
1133 // CHECK-LABEL: @test82(
1134 // CHECK-NEXT: entry:
1135 // CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 42
1136 // CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
1137 // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 42
1138 // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]])
1139 // CHECK-NEXT: ret void
1141 void test82(const __vector_pair *vpp, __vector_pair *vp2) {
1142 __vector_pair vp = __builtin_mma_lxvp(42L, vpp);
1143 __builtin_mma_stxvp(vp, 42L, vp2);
1146 // CHECK-LABEL: @test83(
1147 // CHECK-NEXT: entry:
1148 // CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 32768
1149 // CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
1150 // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 32768
1151 // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]])
1152 // CHECK-NEXT: ret void
1154 void test83(const __vector_pair *vpp, __vector_pair *vp2) {
1155 __vector_pair vp = __builtin_mma_lxvp(32768L, vpp);
1156 __builtin_mma_stxvp(vp, 32768L, vp2);
1159 // CHECK-LABEL: @test84(
1160 // CHECK-NEXT: entry:
1161 // CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 32799
1162 // CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP0]])
1163 // CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[VP2:%.*]], i64 32799
1164 // CHECK-NEXT: tail call void @llvm.ppc.vsx.stxvp(<256 x i1> [[TMP1]], ptr [[TMP2]])
1165 // CHECK-NEXT: ret void
1167 void test84(const __vector_pair *vpp, __vector_pair *vp2) {
1168 __vector_pair vp = __builtin_mma_lxvp(32799L, vpp);
1169 __builtin_mma_stxvp(vp, 32799L, vp2);
1172 // CHECK-LABEL: @test85(
1173 // CHECK-NEXT: entry:
1174 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
1175 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 8
1176 // CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP1]])
1177 // CHECK-NEXT: [[TMP3:%.*]] = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> [[TMP0]], <256 x i1> [[TMP2]], <16 x i8> [[VC:%.*]], i32 0, i32 0)
1178 // CHECK-NEXT: store <512 x i1> [[TMP3]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
1179 // CHECK-NEXT: ret void
1181 void test85(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
1182 __vector_quad vq = *((__vector_quad *)vqp);
1183 __vector_pair vp = __builtin_mma_lxvp(8L, vpp);
1184 __builtin_mma_pmxvf64gernn(&vq, vp, vc, 0, 0);
1185 *((__vector_quad *)resp) = vq;
1188 // CHECK-LABEL: @test86(
1189 // CHECK-NEXT: entry:
1190 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
1191 // CHECK-NEXT: [[TMP1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[VPP:%.*]])
1192 // CHECK-NEXT: [[TMP2:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
1193 // CHECK-NEXT: store <512 x i1> [[TMP2]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
1194 // CHECK-NEXT: ret void
1196 void test86(unsigned char *vqp, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
1197 __vector_quad vq = *((__vector_quad *)vqp);
1198 __vector_pair vp = __builtin_mma_lxvp(0L, vpp);
1199 __builtin_mma_xvf64gernp(&vq, vp, vc);
1200 *((__vector_quad *)resp) = vq;
1203 // CHECK-LABEL: @test87(
1204 // CHECK-NEXT: entry:
1205 // CHECK-NEXT: [[TMP0:%.*]] = load <512 x i1>, ptr [[VQP:%.*]], align 64, !tbaa [[TBAA2]]
1206 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[VPP:%.*]], i64 [[OFFS:%.*]]
1207 // CHECK-NEXT: [[TMP2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr [[TMP1]])
1208 // CHECK-NEXT: [[TMP3:%.*]] = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> [[TMP0]], <256 x i1> [[TMP2]], <16 x i8> [[VC:%.*]])
1209 // CHECK-NEXT: store <512 x i1> [[TMP3]], ptr [[RESP:%.*]], align 64, !tbaa [[TBAA2]]
1210 // CHECK-NEXT: ret void
1212 void test87(unsigned char *vqp, signed long offs, const __vector_pair *vpp, vector unsigned char vc, unsigned char *resp) {
1213 __vector_quad vq = *((__vector_quad *)vqp);
1214 __vector_pair vp = __builtin_mma_lxvp(offs, vpp);
1215 __builtin_mma_xvf64gernp(&vq, vp, vc);
1216 *((__vector_quad *)resp) = vq;