[flang][openacc] Use OpenACC terminator instead of fir.unreachable after Stop stmt...
[llvm-project.git] / flang / test / Lower / PowerPC / ppc-mma-outer-product-2.f90
blob778d58a745be9d91879e1db0e2542d5f93c67b96
1 ! RUN: %flang_fc1 -triple powerpc64le-unknown-unknown -target-cpu pwr10 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s
2 ! REQUIRES: target=powerpc{{.*}}
4 subroutine test_xvbf16ger2()
5 use, intrinsic :: mma
6 implicit none
7 vector(unsigned(1)) vu10, vu11
8 __vector_quad :: cq
9 call mma_xvbf16ger2(cq, vu10, vu11)
10 end subroutine test_xvbf16ger2
12 !CHECK-LABEL: @test_xvbf16ger2_
13 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
14 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
15 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
16 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
17 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
18 !CHECK: %6 = call <512 x i1> @llvm.ppc.mma.xvbf16ger2(<16 x i8> %4, <16 x i8> %5)
19 !CHECK: store <512 x i1> %6, ptr %1, align 64
22 subroutine test_xvbf16ger2nn()
23 use, intrinsic :: mma
24 implicit none
25 vector(unsigned(1)) vu10, vu11
26 __vector_quad :: cq
27 call mma_xvbf16ger2nn(cq, vu10, vu11)
28 end subroutine test_xvbf16ger2nn
30 !CHECK-LABEL: @test_xvbf16ger2nn_
31 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
32 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
33 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
34 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
35 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
36 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
37 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.xvbf16ger2nn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
38 !CHECK: store <512 x i1> %7, ptr %1, align 64
40 subroutine test_xvbf16ger2np()
41 use, intrinsic :: mma
42 implicit none
43 vector(unsigned(1)) vu10, vu11
44 __vector_quad :: cq
45 call mma_xvbf16ger2np(cq, vu10, vu11)
46 end subroutine test_xvbf16ger2np
48 !CHECK-LABEL: @test_xvbf16ger2np_
49 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
50 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
51 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
52 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
53 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
54 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
55 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.xvbf16ger2np(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
56 !CHECK: store <512 x i1> %7, ptr %1, align 64
58 subroutine test_xvbf16ger2pn()
59 use, intrinsic :: mma
60 implicit none
61 vector(unsigned(1)) vu10, vu11
62 __vector_quad :: cq
63 call mma_xvbf16ger2pn(cq, vu10, vu11)
64 end subroutine test_xvbf16ger2pn
66 !CHECK-LABEL: @test_xvbf16ger2pn_
67 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
68 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
69 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
70 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
71 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
72 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
73 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.xvbf16ger2pn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
74 !CHECK: store <512 x i1> %7, ptr %1, align 64
76 subroutine test_xvbf16ger2pp()
77 use, intrinsic :: mma
78 implicit none
79 vector(unsigned(1)) vu10, vu11
80 __vector_quad :: cq
81 call mma_xvbf16ger2pp(cq, vu10, vu11)
82 end subroutine test_xvbf16ger2pp
84 !CHECK-LABEL: @test_xvbf16ger2pp_
85 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
86 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
87 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
88 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
89 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
90 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
91 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.xvbf16ger2pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
92 !CHECK: store <512 x i1> %7, ptr %1, align 64
94 subroutine test_xvf16ger2()
95 use, intrinsic :: mma
96 implicit none
97 vector(unsigned(1)) vu10, vu11
98 __vector_quad :: cq
99 call mma_xvf16ger2(cq, vu10, vu11)
100 end subroutine test_xvf16ger2
102 !CHECK-LABEL: @test_xvf16ger2_
103 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
104 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
105 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
106 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
107 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
108 !CHECK: %6 = call <512 x i1> @llvm.ppc.mma.xvf16ger2(<16 x i8> %4, <16 x i8> %5)
109 !CHECK: store <512 x i1> %6, ptr %1, align 64
111 subroutine test_xvf16ger2nn()
112 use, intrinsic :: mma
113 implicit none
114 vector(unsigned(1)) vu10, vu11
115 __vector_quad :: cq
116 call mma_xvf16ger2nn(cq, vu10, vu11)
117 end subroutine test_xvf16ger2nn
119 !CHECK-LABEL: @test_xvf16ger2nn_
120 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
121 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
122 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
123 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
124 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
125 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
126 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.xvf16ger2nn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
127 !CHECK: store <512 x i1> %7, ptr %1, align 64
129 subroutine test_xvf16ger2np()
130 use, intrinsic :: mma
131 implicit none
132 vector(unsigned(1)) vu10, vu11
133 __vector_quad :: cq
134 call mma_xvf16ger2np(cq, vu10, vu11)
135 end subroutine test_xvf16ger2np
137 !CHECK-LABEL: @test_xvf16ger2np_
138 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
139 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
140 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
141 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
142 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
143 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
144 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.xvf16ger2np(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
145 !CHECK: store <512 x i1> %7, ptr %1, align 64
147 subroutine test_xvf16ger2pn()
148 use, intrinsic :: mma
149 implicit none
150 vector(unsigned(1)) vu10, vu11
151 __vector_quad :: cq
152 call mma_xvf16ger2pn(cq, vu10, vu11)
153 end subroutine test_xvf16ger2pn
155 !CHECK-LABEL: @test_xvf16ger2pn_
156 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
157 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
158 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
159 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
160 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
161 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
162 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
163 !CHECK: store <512 x i1> %7, ptr %1, align 64
165 subroutine test_xvf16ger2pp()
166 use, intrinsic :: mma
167 implicit none
168 vector(unsigned(1)) vu10, vu11
169 __vector_quad :: cq
170 call mma_xvf16ger2pp(cq, vu10, vu11)
171 end subroutine test_xvf16ger2pp
173 !CHECK-LABEL: @test_xvf16ger2pp_
174 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
175 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
176 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
177 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
178 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
179 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
180 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
181 !CHECK: store <512 x i1> %7, ptr %1, align 64
183 subroutine test_xvf32ger_u1()
184 use, intrinsic :: mma
185 implicit none
186 vector(unsigned(1)) vu10, vu11
187 __vector_quad :: cq
188 call mma_xvf32ger(cq, vu10, vu11)
189 end subroutine test_xvf32ger_u1
191 !CHECK-LABEL: @test_xvf32ger_u1_
192 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
193 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
194 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
195 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
196 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
197 !CHECK: %6 = call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> %4, <16 x i8> %5)
198 !CHECK: store <512 x i1> %6, ptr %1, align 64
201 subroutine test_xvf32ger_r4()
202 use, intrinsic :: mma
203 implicit none
204 vector(real(4)) vr40, vr41
205 __vector_quad :: cq
206 call mma_xvf32ger(cq, vr40, vr41)
207 end subroutine test_xvf32ger_r4
209 !CHECK-LABEL: @test_xvf32ger_r4_
210 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
211 !CHECK: %2 = alloca <4 x float>, i64 1, align 16
212 !CHECK: %3 = alloca <4 x float>, i64 1, align 16
213 !CHECK: %4 = load <4 x float>, ptr %2, align 16
214 !CHECK: %5 = load <4 x float>, ptr %3, align 16
215 !CHECK: %6 = bitcast <4 x float> %4 to <16 x i8>
216 !CHECK: %7 = bitcast <4 x float> %5 to <16 x i8>
217 !CHECK: %8 = call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> %6, <16 x i8> %7)
218 !CHECK: store <512 x i1> %8, ptr %1, align 64
220 subroutine test_xvf32gernn_u1()
221 use, intrinsic :: mma
222 implicit none
223 vector(unsigned(1)) vu10, vu11
224 __vector_quad :: cq
225 call mma_xvf32gernn(cq, vu10, vu11)
226 end subroutine test_xvf32gernn_u1
228 !CHECK-LABEL: @test_xvf32gernn_u1_
229 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
230 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
231 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
232 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
233 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
234 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
235 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.xvf32gernn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
236 !CHECK: store <512 x i1> %7, ptr %1, align 64
238 subroutine test_xvf32gernn_r4()
239 use, intrinsic :: mma
240 implicit none
241 vector(real(4)) vr40, vr41
242 __vector_quad :: cq
243 call mma_xvf32gernn(cq, vr40, vr41)
244 end subroutine test_xvf32gernn_r4
246 !CHECK-LABEL: @test_xvf32gernn_r4_
247 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
248 !CHECK: %2 = alloca <4 x float>, i64 1, align 16
249 !CHECK: %3 = alloca <4 x float>, i64 1, align 16
250 !CHECK: %4 = load <4 x float>, ptr %2, align 16
251 !CHECK: %5 = load <4 x float>, ptr %3, align 16
252 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
253 !CHECK: %7 = bitcast <4 x float> %4 to <16 x i8>
254 !CHECK: %8 = bitcast <4 x float> %5 to <16 x i8>
255 !CHECK: %9 = call <512 x i1> @llvm.ppc.mma.xvf32gernn(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8)
256 !CHECK: store <512 x i1> %9, ptr %1, align 64
258 subroutine test_xvf32gernp_u1()
259 use, intrinsic :: mma
260 implicit none
261 vector(unsigned(1)) vu10, vu11
262 __vector_quad :: cq
263 call mma_xvf32gernp(cq, vu10, vu11)
264 end subroutine test_xvf32gernp_u1
266 !CHECK-LABEL: @test_xvf32gernp_u1_
267 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
268 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
269 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
270 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
271 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
272 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
273 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
274 !CHECK: store <512 x i1> %7, ptr %1, align 64
276 subroutine test_xvf32gernp_r4()
277 use, intrinsic :: mma
278 implicit none
279 vector(real(4)) vr40, vr41
280 __vector_quad :: cq
281 call mma_xvf32gernp(cq, vr40, vr41)
282 end subroutine test_xvf32gernp_r4
284 !CHECK-LABEL: @test_xvf32gernp_r4_
285 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
286 !CHECK: %2 = alloca <4 x float>, i64 1, align 16
287 !CHECK: %3 = alloca <4 x float>, i64 1, align 16
288 !CHECK: %4 = load <4 x float>, ptr %2, align 16
289 !CHECK: %5 = load <4 x float>, ptr %3, align 16
290 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
291 !CHECK: %7 = bitcast <4 x float> %4 to <16 x i8>
292 !CHECK: %8 = bitcast <4 x float> %5 to <16 x i8>
293 !CHECK: %9 = call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8)
294 !CHECK: store <512 x i1> %9, ptr %1, align 64
296 subroutine test_xvf32gerpn_u1()
297 use, intrinsic :: mma
298 implicit none
299 vector(unsigned(1)) vu10, vu11
300 __vector_quad :: cq
301 call mma_xvf32gerpn(cq, vu10, vu11)
302 end subroutine test_xvf32gerpn_u1
304 !CHECK-LABEL: @test_xvf32gerpn_u1_
305 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
306 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
307 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
308 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
309 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
310 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
311 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
312 !CHECK: store <512 x i1> %7, ptr %1, align 64
314 subroutine test_xvf32gerpn_r4()
315 use, intrinsic :: mma
316 implicit none
317 vector(real(4)) vr40, vr41
318 __vector_quad :: cq
319 call mma_xvf32gerpn(cq, vr40, vr41)
320 end subroutine test_xvf32gerpn_r4
322 !CHECK-LABEL: @test_xvf32gerpn_r4_
323 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
324 !CHECK: %2 = alloca <4 x float>, i64 1, align 16
325 !CHECK: %3 = alloca <4 x float>, i64 1, align 16
326 !CHECK: %4 = load <4 x float>, ptr %2, align 16
327 !CHECK: %5 = load <4 x float>, ptr %3, align 16
328 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
329 !CHECK: %7 = bitcast <4 x float> %4 to <16 x i8>
330 !CHECK: %8 = bitcast <4 x float> %5 to <16 x i8>
331 !CHECK: %9 = call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8)
332 !CHECK: store <512 x i1> %9, ptr %1, align 64
334 subroutine test_xvf32gerpp_u1()
335 use, intrinsic :: mma
336 implicit none
337 vector(unsigned(1)) vu10, vu11
338 __vector_quad :: cq
339 call mma_xvf32gerpp(cq, vu10, vu11)
340 end subroutine test_xvf32gerpp_u1
342 !CHECK-LABEL: @test_xvf32gerpp_u1_
343 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
344 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
345 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
346 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
347 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
348 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
349 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
350 !CHECK: store <512 x i1> %7, ptr %1, align 64
353 subroutine test_xvf32gerpp_r4()
354 use, intrinsic :: mma
355 implicit none
356 vector(real(4)) vr40, vr41
357 __vector_quad :: cq
358 call mma_xvf32gerpp(cq, vr40, vr41)
359 end subroutine test_xvf32gerpp_r4
361 !CHECK-LABEL: @test_xvf32gerpp_r4_
362 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
363 !CHECK: %2 = alloca <4 x float>, i64 1, align 16
364 !CHECK: %3 = alloca <4 x float>, i64 1, align 16
365 !CHECK: %4 = load <4 x float>, ptr %2, align 16
366 !CHECK: %5 = load <4 x float>, ptr %3, align 16
367 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
368 !CHECK: %7 = bitcast <4 x float> %4 to <16 x i8>
369 !CHECK: %8 = bitcast <4 x float> %5 to <16 x i8>
370 !CHECK: %9 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8)
371 !CHECK: store <512 x i1> %9, ptr %1, align 64
373 subroutine test_xvf64ger_u1()
374 use, intrinsic :: mma
375 implicit none
376 vector(unsigned(1)) vu10
377 __vector_quad :: cq
378 __vector_pair :: cp
379 call mma_xvf64ger(cq, cp, vu10)
380 end subroutine test_xvf64ger_u1
382 !CHECK-LABEL: @test_xvf64ger_u1_
383 !CHECK: %1 = alloca <256 x i1>, i64 1, align 32
384 !CHECK: %2 = alloca <512 x i1>, i64 1, align 64
385 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
386 !CHECK: %4 = load <256 x i1>, ptr %1, align 32
387 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
388 !CHECK: %6 = call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> %4, <16 x i8> %5)
389 !CHECK: store <512 x i1> %6, ptr %2, align 64
391 subroutine test_xvf64ger_r8()
392 use, intrinsic :: mma
393 implicit none
394 vector(real(8)) vr80
395 __vector_quad :: cq
396 __vector_pair :: cp
397 call mma_xvf64ger(cq, cp, vr80)
398 end subroutine test_xvf64ger_r8
400 !CHECK-LABEL: @test_xvf64ger_r8_
401 !CHECK: %1 = alloca <256 x i1>, i64 1, align 32
402 !CHECK: %2 = alloca <512 x i1>, i64 1, align 64
403 !CHECK: %3 = alloca <2 x double>, i64 1, align 16
404 !CHECK: %4 = load <256 x i1>, ptr %1, align 32
405 !CHECK: %5 = load <2 x double>, ptr %3, align 16
406 !CHECK: %6 = bitcast <2 x double> %5 to <16 x i8>
407 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> %4, <16 x i8> %6)
408 !CHECK: store <512 x i1> %7, ptr %2, align 64
411 subroutine test_xvf64gernn_u1()
412 use, intrinsic :: mma
413 implicit none
414 vector(unsigned(1)) vu10
415 __vector_quad :: cq
416 __vector_pair :: cp
417 call mma_xvf64gernn(cq, cp, vu10)
418 end subroutine test_xvf64gernn_u1
420 !CHECK-LABEL: @test_xvf64gernn_u1_
421 !CHECK: %1 = alloca <256 x i1>, i64 1, align 32
422 !CHECK: %2 = alloca <512 x i1>, i64 1, align 64
423 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
424 !CHECK: %4 = load <256 x i1>, ptr %1, align 32
425 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
426 !CHECK: %6 = load <512 x i1>, ptr %2, align 64
427 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.xvf64gernn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5)
428 !CHECK: store <512 x i1> %7, ptr %2, align 64
431 subroutine test_xvf64gernn_r8()
432 use, intrinsic :: mma
433 implicit none
434 vector(real(8)) vr80
435 __vector_quad :: cq
436 __vector_pair :: cp
437 call mma_xvf64gernn(cq, cp, vr80)
438 end subroutine test_xvf64gernn_r8
440 !CHECK-LABEL: @test_xvf64gernn_r8_
441 !CHECK: %1 = alloca <256 x i1>, i64 1, align 32
442 !CHECK: %2 = alloca <512 x i1>, i64 1, align 64
443 !CHECK: %3 = alloca <2 x double>, i64 1, align 16
444 !CHECK: %4 = load <256 x i1>, ptr %1, align 32
445 !CHECK: %5 = load <2 x double>, ptr %3, align 16
446 !CHECK: %6 = load <512 x i1>, ptr %2, align 64
447 !CHECK: %7 = bitcast <2 x double> %5 to <16 x i8>
448 !CHECK: %8 = call <512 x i1> @llvm.ppc.mma.xvf64gernn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7)
449 !CHECK: store <512 x i1> %8, ptr %2, align 64
451 subroutine test_xvf64gernp_u1()
452 use, intrinsic :: mma
453 implicit none
454 vector(unsigned(1)) vu10
455 __vector_quad :: cq
456 __vector_pair :: cp
457 call mma_xvf64gernp(cq, cp, vu10)
458 end subroutine test_xvf64gernp_u1
460 !CHECK-LABEL: @test_xvf64gernp_u1_
461 !CHECK: %1 = alloca <256 x i1>, i64 1, align 32
462 !CHECK: %2 = alloca <512 x i1>, i64 1, align 64
463 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
464 !CHECK: %4 = load <256 x i1>, ptr %1, align 32
465 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
466 !CHECK: %6 = load <512 x i1>, ptr %2, align 64
467 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5)
468 !CHECK: store <512 x i1> %7, ptr %2, align 64
470 subroutine test_xvf64gernp_r8()
471 use, intrinsic :: mma
472 implicit none
473 vector(unsigned(1)) vr80
474 __vector_quad :: cq
475 __vector_pair :: cp
476 call mma_xvf64gernp(cq, cp, vr80)
477 end subroutine test_xvf64gernp_r8
479 !CHECK-LABEL: @test_xvf64gernp_r8_
480 !CHECK: %1 = alloca <256 x i1>, i64 1, align 32
481 !CHECK: %2 = alloca <512 x i1>, i64 1, align 64
482 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
483 !CHECK: %4 = load <256 x i1>, ptr %1, align 32
484 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
485 !CHECK: %6 = load <512 x i1>, ptr %2, align 64
486 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5)
487 !CHECK: store <512 x i1> %7, ptr %2, align 64
489 subroutine test_xvf64gerpn_u1()
490 use, intrinsic :: mma
491 implicit none
492 vector(unsigned(1)) vu10
493 __vector_quad :: cq
494 __vector_pair :: cp
495 call mma_xvf64gerpn(cq, cp, vu10)
496 end subroutine test_xvf64gerpn_u1
498 !CHECK-LABEL: @test_xvf64gerpn_u1_
499 !CHECK: %1 = alloca <256 x i1>, i64 1, align 32
500 !CHECK: %2 = alloca <512 x i1>, i64 1, align 64
501 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
502 !CHECK: %4 = load <256 x i1>, ptr %1, align 32
503 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
504 !CHECK: %6 = load <512 x i1>, ptr %2, align 64
505 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.xvf64gerpn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5)
506 !CHECK: store <512 x i1> %7, ptr %2, align 64
508 subroutine test_xvf64gerpn_r8()
509 use, intrinsic :: mma
510 implicit none
511 vector(real(8)) vr80
512 __vector_quad :: cq
513 __vector_pair :: cp
514 call mma_xvf64gerpn(cq, cp, vr80)
515 end subroutine test_xvf64gerpn_r8
517 !CHECK-LABEL: @test_xvf64gerpn_r8_
518 !CHECK: %1 = alloca <256 x i1>, i64 1, align 32
519 !CHECK: %2 = alloca <512 x i1>, i64 1, align 64
520 !CHECK: %3 = alloca <2 x double>, i64 1, align 16
521 !CHECK: %4 = load <256 x i1>, ptr %1, align 32
522 !CHECK: %5 = load <2 x double>, ptr %3, align 16
523 !CHECK: %6 = load <512 x i1>, ptr %2, align 64
524 !CHECK: %7 = bitcast <2 x double> %5 to <16 x i8>
525 !CHECK: %8 = call <512 x i1> @llvm.ppc.mma.xvf64gerpn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7)
526 !CHECK: store <512 x i1> %8, ptr %2, align 64
528 subroutine test_xvf64gerpp_u1()
529 use, intrinsic :: mma
530 implicit none
531 vector(unsigned(1)) vu10
532 __vector_quad :: cq
533 __vector_pair :: cp
534 call mma_xvf64gerpp(cq, cp, vu10)
535 end subroutine test_xvf64gerpp_u1
537 !CHECK-LABEL: @test_xvf64gerpp_u1_
538 !CHECK: %1 = alloca <256 x i1>, i64 1, align 32
539 !CHECK: %2 = alloca <512 x i1>, i64 1, align 64
540 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
541 !CHECK: %4 = load <256 x i1>, ptr %1, align 32
542 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
543 !CHECK: %6 = load <512 x i1>, ptr %2, align 64
544 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5)
545 !CHECK: store <512 x i1> %7, ptr %2, align 64
548 subroutine test_xvf64gerpp_r8()
549 use, intrinsic :: mma
550 implicit none
551 vector(real(8)) vr80
552 __vector_quad :: cq
553 __vector_pair :: cp
554 call mma_xvf64gerpp(cq, cp, vr80)
555 end subroutine test_xvf64gerpp_r8
557 !CHECK-LABEL: @test_xvf64gerpp_r8_
558 !CHECK: %1 = alloca <256 x i1>, i64 1, align 32
559 !CHECK: %2 = alloca <512 x i1>, i64 1, align 64
560 !CHECK: %3 = alloca <2 x double>, i64 1, align 16
561 !CHECK: %4 = load <256 x i1>, ptr %1, align 32
562 !CHECK: %5 = load <2 x double>, ptr %3, align 16
563 !CHECK: %6 = load <512 x i1>, ptr %2, align 64
564 !CHECK: %7 = bitcast <2 x double> %5 to <16 x i8>
565 !CHECK: %8 = call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7)
566 !CHECK: store <512 x i1> %8, ptr %2, align 64
568 subroutine test_xvi16ger2_u1()
569 use, intrinsic :: mma
570 implicit none
571 vector(unsigned(1)) vu10, vu11
572 __vector_quad :: cq
573 call mma_xvi16ger2(cq, vu10, vu11)
574 end subroutine test_xvi16ger2_u1
576 !CHECK-LABEL: @test_xvi16ger2_u1_
577 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
578 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
579 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
580 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
581 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
582 !CHECK: %6 = call <512 x i1> @llvm.ppc.mma.xvi16ger2(<16 x i8> %4, <16 x i8> %5)
583 !CHECK: store <512 x i1> %6, ptr %1, align 64
585 subroutine test_xvi16ger2_i2()
586 use, intrinsic :: mma
587 implicit none
588 vector(integer(2)) vi20, vi21
589 __vector_quad :: cq
590 call mma_xvi16ger2(cq, vi20, vi21)
591 end subroutine test_xvi16ger2_i2
593 !CHECK-LABEL: @test_xvi16ger2_i2_
594 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
595 !CHECK: %2 = alloca <8 x i16>, i64 1, align 16
596 !CHECK: %3 = alloca <8 x i16>, i64 1, align 16
597 !CHECK: %4 = load <8 x i16>, ptr %2, align 16
598 !CHECK: %5 = load <8 x i16>, ptr %3, align 16
599 !CHECK: %6 = bitcast <8 x i16> %4 to <16 x i8>
600 !CHECK: %7 = bitcast <8 x i16> %5 to <16 x i8>
601 !CHECK: %8 = call <512 x i1> @llvm.ppc.mma.xvi16ger2(<16 x i8> %6, <16 x i8> %7)
602 !CHECK: store <512 x i1> %8, ptr %1, align 64
604 subroutine test_xvi16ger2pp_u1()
605 use, intrinsic :: mma
606 implicit none
607 vector(unsigned(1)) vu10, vu11
608 __vector_quad :: cq
609 call mma_xvi16ger2pp(cq, vu10, vu11)
610 end subroutine test_xvi16ger2pp_u1
612 !CHECK-LABEL: @test_xvi16ger2pp_u1_
613 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
614 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
615 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
616 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
617 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
618 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
619 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.xvi16ger2pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
620 !CHECK: store <512 x i1> %7, ptr %1, align 64
622 subroutine test_xvi16ger2pp_i2()
623 use, intrinsic :: mma
624 implicit none
625 vector(integer(2)) vi20, vi21
626 __vector_quad :: cq
627 call mma_xvi16ger2pp(cq, vi20, vi21)
628 end subroutine test_xvi16ger2pp_i2
630 !CHECK-LABEL: @test_xvi16ger2pp_i2_
631 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
632 !CHECK: %2 = alloca <8 x i16>, i64 1, align 16
633 !CHECK: %3 = alloca <8 x i16>, i64 1, align 16
634 !CHECK: %4 = load <8 x i16>, ptr %2, align 16
635 !CHECK: %5 = load <8 x i16>, ptr %3, align 16
636 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
637 !CHECK: %7 = bitcast <8 x i16> %4 to <16 x i8>
638 !CHECK: %8 = bitcast <8 x i16> %5 to <16 x i8>
639 !CHECK: %9 = call <512 x i1> @llvm.ppc.mma.xvi16ger2pp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8)
640 !CHECK: store <512 x i1> %9, ptr %1, align 64
642 subroutine test_xvi16ger2s_u1()
643 use, intrinsic :: mma
644 implicit none
645 vector(unsigned(1)) vu10, vu11
646 __vector_quad :: cq
647 call mma_xvi16ger2s(cq, vu10, vu11)
648 end subroutine test_xvi16ger2s_u1
650 !CHECK-LABEL: @test_xvi16ger2s_u1_
651 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
652 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
653 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
654 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
655 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
656 !CHECK: %6 = call <512 x i1> @llvm.ppc.mma.xvi16ger2s(<16 x i8> %4, <16 x i8> %5)
657 !CHECK: store <512 x i1> %6, ptr %1, align 64
659 subroutine test_xvi16ger2s_i2()
660 use, intrinsic :: mma
661 implicit none
662 vector(integer(2)) vi20, vi21
663 __vector_quad :: cq
664 call mma_xvi16ger2s(cq, vi20, vi21)
665 end subroutine test_xvi16ger2s_i2
667 !CHECK-LABEL: @test_xvi16ger2s_i2_
668 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
669 !CHECK: %2 = alloca <8 x i16>, i64 1, align 16
670 !CHECK: %3 = alloca <8 x i16>, i64 1, align 16
671 !CHECK: %4 = load <8 x i16>, ptr %2, align 16
672 !CHECK: %5 = load <8 x i16>, ptr %3, align 16
673 !CHECK: %6 = bitcast <8 x i16> %4 to <16 x i8>
674 !CHECK: %7 = bitcast <8 x i16> %5 to <16 x i8>
675 !CHECK: %8 = call <512 x i1> @llvm.ppc.mma.xvi16ger2s(<16 x i8> %6, <16 x i8> %7)
676 !CHECK: store <512 x i1> %8, ptr %1, align 64
678 subroutine test_xvi16ger2spp_u1()
679 use, intrinsic :: mma
680 implicit none
681 vector(unsigned(1)) vu10, vu11
682 __vector_quad :: cq
683 call mma_xvi16ger2spp(cq, vu10, vu11)
684 end subroutine test_xvi16ger2spp_u1
686 !CHECK-LABEL: @test_xvi16ger2spp_u1_
687 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
688 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
689 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
690 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
691 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
692 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
693 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.xvi16ger2spp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
694 !CHECK: store <512 x i1> %7, ptr %1, align 64
696 subroutine test_xvi16ger2spp_i2()
697 use, intrinsic :: mma
698 implicit none
699 vector(integer(2)) vi20, vi21
700 __vector_quad :: cq
701 call mma_xvi16ger2spp(cq, vi20, vi21)
702 end subroutine test_xvi16ger2spp_i2
704 !CHECK-LABEL: @test_xvi16ger2spp_i2_
705 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
706 !CHECK: %2 = alloca <8 x i16>, i64 1, align 16
707 !CHECK: %3 = alloca <8 x i16>, i64 1, align 16
708 !CHECK: %4 = load <8 x i16>, ptr %2, align 16
709 !CHECK: %5 = load <8 x i16>, ptr %3, align 16
710 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
711 !CHECK: %7 = bitcast <8 x i16> %4 to <16 x i8>
712 !CHECK: %8 = bitcast <8 x i16> %5 to <16 x i8>
713 !CHECK: %9 = call <512 x i1> @llvm.ppc.mma.xvi16ger2spp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8)
714 !CHECK: store <512 x i1> %9, ptr %1, align 64
716 subroutine test_xvi4ger8()
717 use, intrinsic :: mma
718 implicit none
719 vector(unsigned(1)) vu10, vu11
720 __vector_quad :: cq
721 call mma_xvi4ger8(cq, vu10, vu11)
722 end subroutine test_xvi4ger8
724 !CHECK-LABEL: @test_xvi4ger8_
725 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
726 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
727 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
728 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
729 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
730 !CHECK: %6 = call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> %4, <16 x i8> %5)
731 !CHECK: store <512 x i1> %6, ptr %1, align 64
733 subroutine test_xvi4ger8pp()
734 use, intrinsic :: mma
735 implicit none
736 vector(unsigned(1)) vu10, vu11
737 __vector_quad :: cq
738 call mma_xvi4ger8pp(cq, vu10, vu11)
739 end subroutine test_xvi4ger8pp
741 !CHECK-LABEL: @test_xvi4ger8pp_
742 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
743 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
744 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
745 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
746 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
747 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
748 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
749 !CHECK: store <512 x i1> %7, ptr %1, align 64
751 subroutine test_xvi8ger4_u1()
752 use, intrinsic :: mma
753 implicit none
754 vector(unsigned(1)) vu10, vu11
755 __vector_quad :: cq
756 call mma_xvi8ger4(cq, vu10, vu11)
757 end subroutine test_xvi8ger4_u1
759 !CHECK-LABEL: @test_xvi8ger4_u1_
760 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
761 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
762 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
763 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
764 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
765 !CHECK: %6 = call <512 x i1> @llvm.ppc.mma.xvi8ger4(<16 x i8> %4, <16 x i8> %5)
766 !CHECK: store <512 x i1> %6, ptr %1, align 64
769 subroutine test_xvi8ger4_i1()
770 use, intrinsic :: mma
771 implicit none
772 vector(integer(1)) vi10, vi11
773 __vector_quad :: cq
774 call mma_xvi8ger4(cq, vi10, vi11)
775 end subroutine test_xvi8ger4_i1
777 !CHECK-LABEL: @test_xvi8ger4_i1_
778 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
779 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
780 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
781 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
782 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
783 !CHECK: %6 = call <512 x i1> @llvm.ppc.mma.xvi8ger4(<16 x i8> %4, <16 x i8> %5)
784 !CHECK: store <512 x i1> %6, ptr %1, align 64
786 subroutine test_xvi8ger4pp_u1()
787 use, intrinsic :: mma
788 implicit none
789 vector(unsigned(1)) vu10, vu11
790 __vector_quad :: cq
791 call mma_xvi8ger4pp(cq, vu10, vu11)
792 end subroutine test_xvi8ger4pp_u1
794 !CHECK-LABEL: @test_xvi8ger4pp_u1_
795 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
796 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
797 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
798 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
799 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
800 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
801 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
802 !CHECK: store <512 x i1> %7, ptr %1, align 64
804 subroutine test_xvi8ger4pp_i1()
805 use, intrinsic :: mma
806 implicit none
807 vector(integer(1)) vi10, vi11
808 __vector_quad :: cq
809 call mma_xvi8ger4pp(cq, vi10, vi11)
810 end subroutine test_xvi8ger4pp_i1
812 !CHECK-LABEL: @test_xvi8ger4pp_i1_
813 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
814 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
815 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
816 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
817 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
818 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
819 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
820 !CHECK: store <512 x i1> %7, ptr %1, align 64
822 subroutine test_xvi8ger4spp_u1()
823 use, intrinsic :: mma
824 implicit none
825 vector(unsigned(1)) vu10, vu11
826 __vector_quad :: cq
827 call mma_xvi8ger4spp(cq, vu10, vu11)
828 end subroutine test_xvi8ger4spp_u1
830 !CHECK-LABEL: @test_xvi8ger4spp_u1_
831 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
832 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
833 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
834 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
835 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
836 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
837 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.xvi8ger4spp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
838 !CHECK: store <512 x i1> %7, ptr %1, align 64
840 subroutine test_xvi8ger4spp_i1()
841 use, intrinsic :: mma
842 implicit none
843 vector(integer(1)) vi10, vi11
844 __vector_quad :: cq
845 call mma_xvi8ger4spp(cq, vi10, vi11)
846 end subroutine test_xvi8ger4spp_i1
848 !CHECK-LABEL: @test_xvi8ger4spp_i1_
849 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
850 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
851 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
852 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
853 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
854 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
855 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.xvi8ger4spp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5)
856 !CHECK: store <512 x i1> %7, ptr %1, align 64