1 ! RUN: %flang_fc1 -triple powerpc64le-unknown-unknown -target-cpu pwr10 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s
2 ! REQUIRES: target=powerpc{{.*}}
4 subroutine test_pmxvbf16ger2_def()
7 vector(unsigned(1)) vu10
, vu11
9 call mma_pmxvbf16ger2(cq
, vu10
, vu11
, 7, 7, 2)
10 end subroutine test_pmxvbf16ger2_def
12 !CHECK-LABEL: @test_pmxvbf16ger2_def_
13 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
14 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
15 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
16 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
17 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
18 !CHECK: %6 = call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
19 !CHECK: store <512 x i1> %6, ptr %1, align 64
22 subroutine test_pmxvbf16ger2_non_def()
25 vector(unsigned(1)) vu10
, vu11
27 call mma_pmxvbf16ger2(cq
, vu10
, vu11
, 7_2, 7_1, 2_8)
28 end subroutine test_pmxvbf16ger2_non_def
30 !CHECK-LABEL: @test_pmxvbf16ger2_non_def_
31 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
32 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
33 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
34 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
35 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
36 !CHECK: %6 = call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
37 !CHECK: store <512 x i1> %6, ptr %1, align 64
40 subroutine test_pmxvbf16ger2nn_def()
43 vector(unsigned(1)) vu10
, vu11
45 call mma_pmxvbf16ger2nn(cq
, vu10
, vu11
, 7, 7, 2)
46 end subroutine test_pmxvbf16ger2nn_def
48 !CHECK-LABEL: @test_pmxvbf16ger2nn_def_
49 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
50 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
51 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
52 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
53 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
54 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
55 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2nn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
56 !CHECK: store <512 x i1> %7, ptr %1, align 64
58 subroutine test_pmxvbf16ger2nn_non_def()
61 vector(unsigned(1)) vu10
, vu11
63 call mma_pmxvbf16ger2nn(cq
, vu10
, vu11
, 7_2, 7_1, 2_8)
64 end subroutine test_pmxvbf16ger2nn_non_def
66 !CHECK-LABEL: @test_pmxvbf16ger2nn_non_def_
67 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
68 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
69 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
70 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
71 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
72 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
73 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2nn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
74 !CHECK: store <512 x i1> %7, ptr %1, align 64
76 subroutine test_pmxvbf16ger2np_def()
79 vector(unsigned(1)) vu10
, vu11
81 call mma_pmxvbf16ger2np(cq
, vu10
, vu11
, 7, 7, 2)
82 end subroutine test_pmxvbf16ger2np_def
84 !CHECK-LABEL: @test_pmxvbf16ger2np_def_
85 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
86 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
87 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
88 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
89 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
90 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
91 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2np(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
92 !CHECK: store <512 x i1> %7, ptr %1, align 64
94 subroutine test_pmxvbf16ger2np_non_def()
97 vector(unsigned(1)) vu10
, vu11
99 call mma_pmxvbf16ger2np(cq
, vu10
, vu11
, 7_2, 7_1, 2_8)
100 end subroutine test_pmxvbf16ger2np_non_def
102 !CHECK-LABEL: @test_pmxvbf16ger2np_non_def_
103 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
104 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
105 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
106 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
107 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
108 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
109 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2np(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
110 !CHECK: store <512 x i1> %7, ptr %1, align 64
112 subroutine test_pmxvbf16ger2pn_def()
113 use, intrinsic :: mma
115 vector(unsigned(1)) vu10
, vu11
117 call mma_pmxvbf16ger2pn(cq
, vu10
, vu11
, 7, 7, 2)
118 end subroutine test_pmxvbf16ger2pn_def
120 !CHECK-LABEL: @test_pmxvbf16ger2pn_def_
121 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
122 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
123 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
124 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
125 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
126 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
127 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
128 !CHECK: store <512 x i1> %7, ptr %1, align 64
130 subroutine test_pmxvbf16ger2pn_non_def()
131 use, intrinsic :: mma
133 vector(unsigned(1)) vu10
, vu11
135 call mma_pmxvbf16ger2pn(cq
, vu10
, vu11
, 7_2, 7_1, 2_8)
136 end subroutine test_pmxvbf16ger2pn_non_def
138 !CHECK-LABEL: @test_pmxvbf16ger2pn_non_def_
139 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
140 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
141 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
142 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
143 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
144 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
145 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
146 !CHECK: store <512 x i1> %7, ptr %1, align 64
148 subroutine test_pmxvbf16ger2pp_def()
149 use, intrinsic :: mma
151 vector(unsigned(1)) vu10
, vu11
153 call mma_pmxvbf16ger2pp(cq
, vu10
, vu11
, 7, 7, 2)
154 end subroutine test_pmxvbf16ger2pp_def
156 !CHECK-LABEL: @test_pmxvbf16ger2pp_def_
157 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
158 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
159 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
160 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
161 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
162 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
163 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
164 !CHECK: store <512 x i1> %7, ptr %1, align 64
166 subroutine test_pmxvbf16ger2pp_non_def()
167 use, intrinsic :: mma
169 vector(unsigned(1)) vu10
, vu11
171 call mma_pmxvbf16ger2pp(cq
, vu10
, vu11
, 7_2, 7_1, 2_8)
172 end subroutine test_pmxvbf16ger2pp_non_def
174 !CHECK-LABEL: @test_pmxvbf16ger2pp_non_def_
175 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
176 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
177 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
178 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
179 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
180 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
181 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvbf16ger2pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
182 !CHECK: store <512 x i1> %7, ptr %1, align 64
184 subroutine test_pmxvf16ger2_def()
185 use, intrinsic :: mma
187 vector(unsigned(1)) vu10
, vu11
189 call mma_pmxvf16ger2(cq
, vu10
, vu11
, 7, 7, 2)
190 end subroutine test_pmxvf16ger2_def
192 !CHECK-LABEL: @test_pmxvf16ger2_def_
193 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
194 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
195 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
196 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
197 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
198 !CHECK: %6 = call <512 x i1> @llvm.ppc.mma.pmxvf16ger2(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
199 !CHECK: store <512 x i1> %6, ptr %1, align 64
201 subroutine test_pmxvf16ger2_non_def()
202 use, intrinsic :: mma
204 vector(unsigned(1)) vu10
, vu11
206 call mma_pmxvf16ger2(cq
, vu10
, vu11
, 7_2, 7_1, 2_8)
207 end subroutine test_pmxvf16ger2_non_def
209 !CHECK-LABEL: @test_pmxvf16ger2_non_def_
210 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
211 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
212 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
213 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
214 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
215 !CHECK: %6 = call <512 x i1> @llvm.ppc.mma.pmxvf16ger2(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
216 !CHECK: store <512 x i1> %6, ptr %1, align 64
218 subroutine test_pmxvf16ger2nn_def()
219 use, intrinsic :: mma
221 vector(unsigned(1)) vu10
, vu11
223 call mma_pmxvf16ger2nn(cq
, vu10
, vu11
, 7, 7, 2)
224 end subroutine test_pmxvf16ger2nn_def
226 !CHECK-LABEL: @test_pmxvf16ger2nn_def_
227 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
228 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
229 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
230 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
231 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
232 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
233 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvf16ger2nn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
234 !CHECK: store <512 x i1> %7, ptr %1, align 64
236 subroutine test_pmxvf16ger2nn_non_def()
237 use, intrinsic :: mma
239 vector(unsigned(1)) vu10
, vu11
241 call mma_pmxvf16ger2nn(cq
, vu10
, vu11
, 7_2, 7_1, 2_8)
242 end subroutine test_pmxvf16ger2nn_non_def
244 !CHECK-LABEL: @test_pmxvf16ger2nn_non_def_
245 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
246 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
247 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
248 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
249 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
250 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
251 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvf16ger2nn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
252 !CHECK: store <512 x i1> %7, ptr %1, align 64
254 subroutine test_pmxvf16ger2np_def()
255 use, intrinsic :: mma
257 vector(unsigned(1)) vu10
, vu11
259 call mma_pmxvf16ger2np(cq
, vu10
, vu11
, 7, 7, 2)
260 end subroutine test_pmxvf16ger2np_def
262 !CHECK-LABEL: @test_pmxvf16ger2np_def_
263 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
264 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
265 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
266 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
267 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
268 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
269 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvf16ger2np(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
270 !CHECK: store <512 x i1> %7, ptr %1, align 64
272 subroutine test_pmxvf16ger2np_non_def()
273 use, intrinsic :: mma
275 vector(unsigned(1)) vu10
, vu11
277 call mma_pmxvf16ger2np(cq
, vu10
, vu11
, 7_2, 7_1, 2_8)
278 end subroutine test_pmxvf16ger2np_non_def
280 !CHECK-LABEL: @test_pmxvf16ger2np_non_def_
281 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
282 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
283 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
284 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
285 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
286 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
287 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvf16ger2np(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
288 !CHECK: store <512 x i1> %7, ptr %1, align 64
290 subroutine test_pmxvf16ger2pn_def()
291 use, intrinsic :: mma
293 vector(unsigned(1)) vu10
, vu11
295 call mma_pmxvf16ger2pn(cq
, vu10
, vu11
, 7, 7, 2)
296 end subroutine test_pmxvf16ger2pn_def
298 !CHECK-LABEL: @test_pmxvf16ger2pn_def_
299 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
300 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
301 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
302 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
303 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
304 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
305 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
306 !CHECK: store <512 x i1> %7, ptr %1, align 64
308 subroutine test_pmxvf16ger2pn_non_def()
309 use, intrinsic :: mma
311 vector(unsigned(1)) vu10
, vu11
313 call mma_pmxvf16ger2pn(cq
, vu10
, vu11
, 7_2, 7_1, 2_8)
314 end subroutine test_pmxvf16ger2pn_non_def
316 !CHECK-LABEL: @test_pmxvf16ger2pn_non_def_
317 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
318 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
319 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
320 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
321 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
322 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
323 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
324 !CHECK: store <512 x i1> %7, ptr %1, align 64
326 subroutine test_pmxvf16ger2pp_def()
327 use, intrinsic :: mma
329 vector(unsigned(1)) vu10
, vu11
331 call mma_pmxvf16ger2pp(cq
, vu10
, vu11
, 7, 7, 2)
332 end subroutine test_pmxvf16ger2pp_def
334 !CHECK-LABEL: @test_pmxvf16ger2pp_def_
335 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
336 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
337 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
338 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
339 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
340 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
341 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
342 !CHECK: store <512 x i1> %7, ptr %1, align 64
344 subroutine test_pmxvf16ger2pp_non_def()
345 use, intrinsic :: mma
347 vector(unsigned(1)) vu10
, vu11
349 call mma_pmxvf16ger2pp(cq
, vu10
, vu11
, 7_2, 7_1, 2_8)
350 end subroutine test_pmxvf16ger2pp_non_def
352 !CHECK-LABEL: @test_pmxvf16ger2pp_non_def_
353 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
354 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
355 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
356 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
357 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
358 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
359 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
360 !CHECK: store <512 x i1> %7, ptr %1, align 64
362 subroutine test_pmxvf32ger_u1_def()
363 use, intrinsic :: mma
365 vector(unsigned(1)) vu10
, vu11
367 call mma_pmxvf32ger(cq
, vu10
, vu11
, 7, 2)
368 end subroutine test_pmxvf32ger_u1_def
370 !CHECK-LABEL: @test_pmxvf32ger_u1_def_
371 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
372 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
373 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
374 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
375 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
376 !CHECK: %6 = call <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8> %4, <16 x i8> %5, i32 7, i32 2)
377 !CHECK: store <512 x i1> %6, ptr %1, align 64
379 subroutine test_pmxvf32ger_u1_non_def()
380 use, intrinsic :: mma
382 vector(unsigned(1)) vu10
, vu11
384 call mma_pmxvf32ger(cq
, vu10
, vu11
, 7_2, 2_1)
385 end subroutine test_pmxvf32ger_u1_non_def
387 !CHECK-LABEL: @test_pmxvf32ger_u1_non_def_
388 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
389 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
390 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
391 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
392 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
393 !CHECK: %6 = call <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8> %4, <16 x i8> %5, i32 7, i32 2)
394 !CHECK: store <512 x i1> %6, ptr %1, align 64
396 subroutine test_pmxvf32ger_r4_def()
397 use, intrinsic :: mma
399 vector(real(4)) vr40
, vr41
401 call mma_pmxvf32ger(cq
, vr40
, vr41
, 7, 2)
402 end subroutine test_pmxvf32ger_r4_def
404 !CHECK-LABEL: @test_pmxvf32ger_r4_def_
405 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
406 !CHECK: %2 = alloca <4 x float>, i64 1, align 16
407 !CHECK: %3 = alloca <4 x float>, i64 1, align 16
408 !CHECK: %4 = load <4 x float>, ptr %2, align 16
409 !CHECK: %5 = load <4 x float>, ptr %3, align 16
410 !CHECK: %6 = bitcast <4 x float> %4 to <16 x i8>
411 !CHECK: %7 = bitcast <4 x float> %5 to <16 x i8>
412 !CHECK: %8 = call <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8> %6, <16 x i8> %7, i32 7, i32 2)
413 !CHECK: store <512 x i1> %8, ptr %1, align 64
415 subroutine test_pmxvf32ger_r4_non_def()
416 use, intrinsic :: mma
418 vector(real(4)) vr40
, vr41
420 call mma_pmxvf32ger(cq
, vr40
, vr41
, 7_2, 2_1)
421 end subroutine test_pmxvf32ger_r4_non_def
423 !CHECK-LABEL: @test_pmxvf32ger_r4_non_def_
424 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
425 !CHECK: %2 = alloca <4 x float>, i64 1, align 16
426 !CHECK: %3 = alloca <4 x float>, i64 1, align 16
427 !CHECK: %4 = load <4 x float>, ptr %2, align 16
428 !CHECK: %5 = load <4 x float>, ptr %3, align 16
429 !CHECK: %6 = bitcast <4 x float> %4 to <16 x i8>
430 !CHECK: %7 = bitcast <4 x float> %5 to <16 x i8>
431 !CHECK: %8 = call <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8> %6, <16 x i8> %7, i32 7, i32 2)
432 !CHECK: store <512 x i1> %8, ptr %1, align 64
434 subroutine test_pmxvf32gernn_u1_def()
435 use, intrinsic :: mma
437 vector(unsigned(1)) vu10
, vu11
439 call mma_pmxvf32gernn(cq
, vu10
, vu11
, 7, 2)
440 end subroutine test_pmxvf32gernn_u1_def
442 !CHECK-LABEL: @test_pmxvf32gernn_u1_def_
443 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
444 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
445 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
446 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
447 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
448 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
449 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 2)
450 !CHECK: store <512 x i1> %7, ptr %1, align 64
452 subroutine test_pmxvf32gernn_u1_non_def()
453 use, intrinsic :: mma
455 vector(unsigned(1)) vu10
, vu11
457 call mma_pmxvf32gernn(cq
, vu10
, vu11
, 7_2, 2_1)
458 end subroutine test_pmxvf32gernn_u1_non_def
460 !CHECK-LABEL: @test_pmxvf32gernn_u1_non_def_
461 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
462 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
463 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
464 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
465 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
466 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
467 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 2)
468 !CHECK: store <512 x i1> %7, ptr %1, align 64
470 subroutine test_pmxvf32gernn_r4_def()
471 use, intrinsic :: mma
473 vector(real(4)) vr40
, vr41
475 call mma_pmxvf32gernn(cq
, vr40
, vr41
, 7, 2)
476 end subroutine test_pmxvf32gernn_r4_def
478 !CHECK-LABEL: @test_pmxvf32gernn_r4_def_
479 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
480 !CHECK: %2 = alloca <4 x float>, i64 1, align 16
481 !CHECK: %3 = alloca <4 x float>, i64 1, align 16
482 !CHECK: %4 = load <4 x float>, ptr %2, align 16
483 !CHECK: %5 = load <4 x float>, ptr %3, align 16
484 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
485 !CHECK: %7 = bitcast <4 x float> %4 to <16 x i8>
486 !CHECK: %8 = bitcast <4 x float> %5 to <16 x i8>
487 !CHECK: %9 = call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 2)
488 !CHECK: store <512 x i1> %9, ptr %1, align 64
490 subroutine test_pmxvf32gernn_r4_non_def()
491 use, intrinsic :: mma
493 vector(real(4)) vr40
, vr41
495 call mma_pmxvf32gernn(cq
, vr40
, vr41
, 7_2, 2_1)
496 end subroutine test_pmxvf32gernn_r4_non_def
498 !CHECK-LABEL: @test_pmxvf32gernn_r4_non_def_
499 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
500 !CHECK: %2 = alloca <4 x float>, i64 1, align 16
501 !CHECK: %3 = alloca <4 x float>, i64 1, align 16
502 !CHECK: %4 = load <4 x float>, ptr %2, align 16
503 !CHECK: %5 = load <4 x float>, ptr %3, align 16
504 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
505 !CHECK: %7 = bitcast <4 x float> %4 to <16 x i8>
506 !CHECK: %8 = bitcast <4 x float> %5 to <16 x i8>
507 !CHECK: %9 = call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 2)
508 !CHECK: store <512 x i1> %9, ptr %1, align 64
510 subroutine test_pmxvf32gernp_u1_def()
511 use, intrinsic :: mma
513 vector(unsigned(1)) vu10
, vu11
515 call mma_pmxvf32gernp(cq
, vu10
, vu11
, 7, 2)
516 end subroutine test_pmxvf32gernp_u1_def
518 !CHECK-LABEL: @test_pmxvf32gernp_u1_def_
519 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
520 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
521 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
522 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
523 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
524 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
525 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 2)
526 !CHECK: store <512 x i1> %7, ptr %1, align 64
528 subroutine test_pmxvf32gernp_u1_non_def()
529 use, intrinsic :: mma
531 vector(unsigned(1)) vu10
, vu11
533 call mma_pmxvf32gernp(cq
, vu10
, vu11
, 7_2, 2_1)
534 end subroutine test_pmxvf32gernp_u1_non_def
536 !CHECK-LABEL: @test_pmxvf32gernp_u1_non_def_
537 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
538 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
539 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
540 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
541 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
542 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
543 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 2)
544 !CHECK: store <512 x i1> %7, ptr %1, align 64
546 subroutine test_pmxvf32gernp_r4_def()
547 use, intrinsic :: mma
549 vector(real(4)) vr40
, vr41
551 call mma_pmxvf32gernp(cq
, vr40
, vr41
, 7, 2)
552 end subroutine test_pmxvf32gernp_r4_def
554 !CHECK-LABEL: @test_pmxvf32gernp_r4_def_
555 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
556 !CHECK: %2 = alloca <4 x float>, i64 1, align 16
557 !CHECK: %3 = alloca <4 x float>, i64 1, align 16
558 !CHECK: %4 = load <4 x float>, ptr %2, align 16
559 !CHECK: %5 = load <4 x float>, ptr %3, align 16
560 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
561 !CHECK: %7 = bitcast <4 x float> %4 to <16 x i8>
562 !CHECK: %8 = bitcast <4 x float> %5 to <16 x i8>
563 !CHECK: %9 = call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 2)
564 !CHECK: store <512 x i1> %9, ptr %1, align 64
566 subroutine test_pmxvf32gernp_r4_non_def()
567 use, intrinsic :: mma
569 vector(real(4)) vr40
, vr41
571 call mma_pmxvf32gernp(cq
, vr40
, vr41
, 7_2, 2_1)
572 end subroutine test_pmxvf32gernp_r4_non_def
574 !CHECK-LABEL: @test_pmxvf32gernp_r4_non_def_
575 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
576 !CHECK: %2 = alloca <4 x float>, i64 1, align 16
577 !CHECK: %3 = alloca <4 x float>, i64 1, align 16
578 !CHECK: %4 = load <4 x float>, ptr %2, align 16
579 !CHECK: %5 = load <4 x float>, ptr %3, align 16
580 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
581 !CHECK: %7 = bitcast <4 x float> %4 to <16 x i8>
582 !CHECK: %8 = bitcast <4 x float> %5 to <16 x i8>
583 !CHECK: %9 = call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 2)
584 !CHECK: store <512 x i1> %9, ptr %1, align 64
586 subroutine test_pmxvf32gerpn_u1_def()
587 use, intrinsic :: mma
589 vector(unsigned(1)) vu10
, vu11
591 call mma_pmxvf32gerpn(cq
, vu10
, vu11
, 7, 2)
592 end subroutine test_pmxvf32gerpn_u1_def
594 !CHECK-LABEL: @test_pmxvf32gerpn_u1_def_
595 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
596 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
597 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
598 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
599 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
600 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
601 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 2)
602 !CHECK: store <512 x i1> %7, ptr %1, align 64
604 subroutine test_pmxvf32gerpn_u1_non_def()
605 use, intrinsic :: mma
607 vector(unsigned(1)) vu10
, vu11
609 call mma_pmxvf32gerpn(cq
, vu10
, vu11
, 7_2, 2_1)
610 end subroutine test_pmxvf32gerpn_u1_non_def
612 !CHECK-LABEL: @test_pmxvf32gerpn_u1_non_def_
613 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
614 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
615 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
616 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
617 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
618 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
619 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 2)
620 !CHECK: store <512 x i1> %7, ptr %1, align 64
622 subroutine test_pmxvf32gerpn_r4_def()
623 use, intrinsic :: mma
625 vector(real(4)) vr40
, vr41
627 call mma_pmxvf32gerpn(cq
, vr40
, vr41
, 7, 2)
628 end subroutine test_pmxvf32gerpn_r4_def
630 !CHECK-LABEL: @test_pmxvf32gerpn_r4_def_
631 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
632 !CHECK: %2 = alloca <4 x float>, i64 1, align 16
633 !CHECK: %3 = alloca <4 x float>, i64 1, align 16
634 !CHECK: %4 = load <4 x float>, ptr %2, align 16
635 !CHECK: %5 = load <4 x float>, ptr %3, align 16
636 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
637 !CHECK: %7 = bitcast <4 x float> %4 to <16 x i8>
638 !CHECK: %8 = bitcast <4 x float> %5 to <16 x i8>
639 !CHECK: %9 = call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 2)
640 !CHECK: store <512 x i1> %9, ptr %1, align 64
642 subroutine test_pmxvf32gerpn_r4_non_def()
643 use, intrinsic :: mma
645 vector(real(4)) vr40
, vr41
647 call mma_pmxvf32gerpn(cq
, vr40
, vr41
, 7_2, 2_1)
648 end subroutine test_pmxvf32gerpn_r4_non_def
650 !CHECK-LABEL: @test_pmxvf32gerpn_r4_non_def_
651 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
652 !CHECK: %2 = alloca <4 x float>, i64 1, align 16
653 !CHECK: %3 = alloca <4 x float>, i64 1, align 16
654 !CHECK: %4 = load <4 x float>, ptr %2, align 16
655 !CHECK: %5 = load <4 x float>, ptr %3, align 16
656 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
657 !CHECK: %7 = bitcast <4 x float> %4 to <16 x i8>
658 !CHECK: %8 = bitcast <4 x float> %5 to <16 x i8>
659 !CHECK: %9 = call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 2)
660 !CHECK: store <512 x i1> %9, ptr %1, align 64
662 subroutine test_pmxvf32gerpp_u1_def()
663 use, intrinsic :: mma
665 vector(unsigned(1)) vu10
, vu11
667 call mma_pmxvf32gerpp(cq
, vu10
, vu11
, 7, 2)
668 end subroutine test_pmxvf32gerpp_u1_def
670 !CHECK-LABEL: @test_pmxvf32gerpp_u1_def_
671 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
672 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
673 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
674 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
675 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
676 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
677 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 2)
678 !CHECK: store <512 x i1> %7, ptr %1, align 64
680 subroutine test_pmxvf32gerpp_u1_non_def()
681 use, intrinsic :: mma
683 vector(unsigned(1)) vu10
, vu11
685 call mma_pmxvf32gerpp(cq
, vu10
, vu11
, 7_2, 2_1)
686 end subroutine test_pmxvf32gerpp_u1_non_def
688 !CHECK-LABEL: @test_pmxvf32gerpp_u1_non_def_
689 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
690 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
691 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
692 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
693 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
694 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
695 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 2)
696 !CHECK: store <512 x i1> %7, ptr %1, align 64
698 subroutine test_pmxvf32gerpp_r4_def()
699 use, intrinsic :: mma
701 vector(real(4)) vr40
, vr41
703 call mma_pmxvf32gerpp(cq
, vr40
, vr41
, 7, 2)
704 end subroutine test_pmxvf32gerpp_r4_def
706 !CHECK-LABEL: @test_pmxvf32gerpp_r4_def_
707 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
708 !CHECK: %2 = alloca <4 x float>, i64 1, align 16
709 !CHECK: %3 = alloca <4 x float>, i64 1, align 16
710 !CHECK: %4 = load <4 x float>, ptr %2, align 16
711 !CHECK: %5 = load <4 x float>, ptr %3, align 16
712 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
713 !CHECK: %7 = bitcast <4 x float> %4 to <16 x i8>
714 !CHECK: %8 = bitcast <4 x float> %5 to <16 x i8>
715 !CHECK: %9 = call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 2)
716 !CHECK: store <512 x i1> %9, ptr %1, align 64
718 subroutine test_pmxvf32gerpp_r4_non_def()
719 use, intrinsic :: mma
721 vector(real(4)) vr40
, vr41
723 call mma_pmxvf32gerpp(cq
, vr40
, vr41
, 7_2, 2_1)
724 end subroutine test_pmxvf32gerpp_r4_non_def
726 !CHECK-LABEL: @test_pmxvf32gerpp_r4_non_def_
727 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
728 !CHECK: %2 = alloca <4 x float>, i64 1, align 16
729 !CHECK: %3 = alloca <4 x float>, i64 1, align 16
730 !CHECK: %4 = load <4 x float>, ptr %2, align 16
731 !CHECK: %5 = load <4 x float>, ptr %3, align 16
732 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
733 !CHECK: %7 = bitcast <4 x float> %4 to <16 x i8>
734 !CHECK: %8 = bitcast <4 x float> %5 to <16 x i8>
735 !CHECK: %9 = call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 2)
736 !CHECK: store <512 x i1> %9, ptr %1, align 64
738 subroutine test_pmxvf64ger_u1_def()
739 use, intrinsic :: mma
741 vector(unsigned(1)) vu10
744 call mma_pmxvf64ger(cq
, cp
, vu10
, 7, 2)
745 end subroutine test_pmxvf64ger_u1_def
747 !CHECK-LABEL: @test_pmxvf64ger_u1_def_
748 !CHECK: %1 = alloca <256 x i1>, i64 1, align 32
749 !CHECK: %2 = alloca <512 x i1>, i64 1, align 64
750 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
751 !CHECK: %4 = load <256 x i1>, ptr %1, align 32
752 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
753 !CHECK: %6 = call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> %4, <16 x i8> %5, i32 7, i32 2)
754 !CHECK: store <512 x i1> %6, ptr %2, align 64
756 subroutine test_pmxvf64ger_u1_non_def()
757 use, intrinsic :: mma
759 vector(unsigned(1)) vu10
762 call mma_pmxvf64ger(cq
, cp
, vu10
, 7_2, 2_1)
763 end subroutine test_pmxvf64ger_u1_non_def
765 !CHECK-LABEL: @test_pmxvf64ger_u1_non_def_
766 !CHECK: %1 = alloca <256 x i1>, i64 1, align 32
767 !CHECK: %2 = alloca <512 x i1>, i64 1, align 64
768 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
769 !CHECK: %4 = load <256 x i1>, ptr %1, align 32
770 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
771 !CHECK: %6 = call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> %4, <16 x i8> %5, i32 7, i32 2)
772 !CHECK: store <512 x i1> %6, ptr %2, align 64
774 subroutine test_pmxvf64ger_r8_def()
775 use, intrinsic :: mma
780 call mma_pmxvf64ger(cq
, cp
, vr80
, 7, 2)
781 end subroutine test_pmxvf64ger_r8_def
783 !CHECK-LABEL: @test_pmxvf64ger_r8_def_
784 !CHECK: %1 = alloca <256 x i1>, i64 1, align 32
785 !CHECK: %2 = alloca <512 x i1>, i64 1, align 64
786 !CHECK: %3 = alloca <2 x double>, i64 1, align 16
787 !CHECK: %4 = load <256 x i1>, ptr %1, align 32
788 !CHECK: %5 = load <2 x double>, ptr %3, align 16
789 !CHECK: %6 = bitcast <2 x double> %5 to <16 x i8>
790 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> %4, <16 x i8> %6, i32 7, i32 2)
791 !CHECK: store <512 x i1> %7, ptr %2, align 64
793 subroutine test_pmxvf64ger_r8_non_def()
794 use, intrinsic :: mma
799 call mma_pmxvf64ger(cq
, cp
, vr80
, 7_2, 2_1)
800 end subroutine test_pmxvf64ger_r8_non_def
802 !CHECK-LABEL: @test_pmxvf64ger_r8_non_def_
803 !CHECK: %1 = alloca <256 x i1>, i64 1, align 32
804 !CHECK: %2 = alloca <512 x i1>, i64 1, align 64
805 !CHECK: %3 = alloca <2 x double>, i64 1, align 16
806 !CHECK: %4 = load <256 x i1>, ptr %1, align 32
807 !CHECK: %5 = load <2 x double>, ptr %3, align 16
808 !CHECK: %6 = bitcast <2 x double> %5 to <16 x i8>
809 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> %4, <16 x i8> %6, i32 7, i32 2)
810 !CHECK: store <512 x i1> %7, ptr %2, align 64
812 subroutine test_pmxvf64gernn_u1_def()
813 use, intrinsic :: mma
815 vector(unsigned(1)) vu10
818 call mma_pmxvf64gernn(cq
, cp
, vu10
, 7, 2)
819 end subroutine test_pmxvf64gernn_u1_def
821 !CHECK-LABEL: @test_pmxvf64gernn_u1_def_
822 !CHECK: %1 = alloca <256 x i1>, i64 1, align 32
823 !CHECK: %2 = alloca <512 x i1>, i64 1, align 64
824 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
825 !CHECK: %4 = load <256 x i1>, ptr %1, align 32
826 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
827 !CHECK: %6 = load <512 x i1>, ptr %2, align 64
828 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5, i32 7, i32 2)
829 !CHECK: store <512 x i1> %7, ptr %2, align 64
831 subroutine test_pmxvf64gernn_u1_non_def()
832 use, intrinsic :: mma
834 vector(unsigned(1)) vu10
837 call mma_pmxvf64gernn(cq
, cp
, vu10
, 7_2, 2_1)
838 end subroutine test_pmxvf64gernn_u1_non_def
840 !CHECK-LABEL: @test_pmxvf64gernn_u1_non_def_
841 !CHECK: %1 = alloca <256 x i1>, i64 1, align 32
842 !CHECK: %2 = alloca <512 x i1>, i64 1, align 64
843 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
844 !CHECK: %4 = load <256 x i1>, ptr %1, align 32
845 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
846 !CHECK: %6 = load <512 x i1>, ptr %2, align 64
847 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5, i32 7, i32 2)
848 !CHECK: store <512 x i1> %7, ptr %2, align 64
850 subroutine test_pmxvf64gernn_r8_def()
851 use, intrinsic :: mma
856 call mma_pmxvf64gernn(cq
, cp
, vr80
, 7, 2)
857 end subroutine test_pmxvf64gernn_r8_def
859 !CHECK-LABEL: @test_pmxvf64gernn_r8_def_
860 !CHECK: %1 = alloca <256 x i1>, i64 1, align 32
861 !CHECK: %2 = alloca <512 x i1>, i64 1, align 64
862 !CHECK: %3 = alloca <2 x double>, i64 1, align 16
863 !CHECK: %4 = load <256 x i1>, ptr %1, align 32
864 !CHECK: %5 = load <2 x double>, ptr %3, align 16
865 !CHECK: %6 = load <512 x i1>, ptr %2, align 64
866 !CHECK: %7 = bitcast <2 x double> %5 to <16 x i8>
867 !CHECK: %8 = call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7, i32 7, i32 2)
868 !CHECK: store <512 x i1> %8, ptr %2, align 64
870 subroutine test_pmxvf64gernn_r8_non_def()
871 use, intrinsic :: mma
876 call mma_pmxvf64gernn(cq
, cp
, vr80
, 7_2, 2_1)
877 end subroutine test_pmxvf64gernn_r8_non_def
879 !CHECK-LABEL: @test_pmxvf64gernn_r8_non_def_
880 !CHECK: %1 = alloca <256 x i1>, i64 1, align 32
881 !CHECK: %2 = alloca <512 x i1>, i64 1, align 64
882 !CHECK: %3 = alloca <2 x double>, i64 1, align 16
883 !CHECK: %4 = load <256 x i1>, ptr %1, align 32
884 !CHECK: %5 = load <2 x double>, ptr %3, align 16
885 !CHECK: %6 = load <512 x i1>, ptr %2, align 64
886 !CHECK: %7 = bitcast <2 x double> %5 to <16 x i8>
887 !CHECK: %8 = call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7, i32 7, i32 2)
888 !CHECK: store <512 x i1> %8, ptr %2, align 64
890 subroutine test_pmxvf64gernp_u1_def()
891 use, intrinsic :: mma
893 vector(unsigned(1)) vu10
896 call mma_pmxvf64gernp(cq
, cp
, vu10
, 7, 2)
897 end subroutine test_pmxvf64gernp_u1_def
899 !CHECK-LABEL: @test_pmxvf64gernp_u1_def_
900 !CHECK: %1 = alloca <256 x i1>, i64 1, align 32
901 !CHECK: %2 = alloca <512 x i1>, i64 1, align 64
902 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
903 !CHECK: %4 = load <256 x i1>, ptr %1, align 32
904 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
905 !CHECK: %6 = load <512 x i1>, ptr %2, align 64
906 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5, i32 7, i32 2)
907 !CHECK: store <512 x i1> %7, ptr %2, align 64
909 subroutine test_pmxvf64gernp_u1_non_def()
910 use, intrinsic :: mma
912 vector(unsigned(1)) vu10
915 call mma_pmxvf64gernp(cq
, cp
, vu10
, 7_2, 2_1)
916 end subroutine test_pmxvf64gernp_u1_non_def
918 !CHECK-LABEL: @test_pmxvf64gernp_u1_non_def_
919 !CHECK: %1 = alloca <256 x i1>, i64 1, align 32
920 !CHECK: %2 = alloca <512 x i1>, i64 1, align 64
921 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
922 !CHECK: %4 = load <256 x i1>, ptr %1, align 32
923 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
924 !CHECK: %6 = load <512 x i1>, ptr %2, align 64
925 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5, i32 7, i32 2)
926 !CHECK: store <512 x i1> %7, ptr %2, align 64
928 subroutine test_pmxvf64gernp_r8_def()
929 use, intrinsic :: mma
934 call mma_pmxvf64gernp(cq
, cp
, vr80
, 7, 2)
935 end subroutine test_pmxvf64gernp_r8_def
937 !CHECK-LABEL: @test_pmxvf64gernp_r8_def_
938 !CHECK: %1 = alloca <256 x i1>, i64 1, align 32
939 !CHECK: %2 = alloca <512 x i1>, i64 1, align 64
940 !CHECK: %3 = alloca <2 x double>, i64 1, align 16
941 !CHECK: %4 = load <256 x i1>, ptr %1, align 32
942 !CHECK: %5 = load <2 x double>, ptr %3, align 16
943 !CHECK: %6 = load <512 x i1>, ptr %2, align 64
944 !CHECK: %7 = bitcast <2 x double> %5 to <16 x i8>
945 !CHECK: %8 = call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7, i32 7, i32 2)
946 !CHECK: store <512 x i1> %8, ptr %2, align 64
948 subroutine test_pmxvf64gernp_r8_non_def()
949 use, intrinsic :: mma
954 call mma_pmxvf64gernp(cq
, cp
, vr80
, 7_2, 2_1)
955 end subroutine test_pmxvf64gernp_r8_non_def
957 !CHECK-LABEL: @test_pmxvf64gernp_r8_non_def_
958 !CHECK: %1 = alloca <256 x i1>, i64 1, align 32
959 !CHECK: %2 = alloca <512 x i1>, i64 1, align 64
960 !CHECK: %3 = alloca <2 x double>, i64 1, align 16
961 !CHECK: %4 = load <256 x i1>, ptr %1, align 32
962 !CHECK: %5 = load <2 x double>, ptr %3, align 16
963 !CHECK: %6 = load <512 x i1>, ptr %2, align 64
964 !CHECK: %7 = bitcast <2 x double> %5 to <16 x i8>
965 !CHECK: %8 = call <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7, i32 7, i32 2)
966 !CHECK: store <512 x i1> %8, ptr %2, align 64
968 subroutine test_pmxvf64gerpn_u1_def()
969 use, intrinsic :: mma
971 vector(unsigned(1)) vu10
974 call mma_pmxvf64gerpn(cq
, cp
, vu10
, 7, 2)
975 end subroutine test_pmxvf64gerpn_u1_def
977 !CHECK-LABEL: @test_pmxvf64gerpn_u1_def_
978 !CHECK: %1 = alloca <256 x i1>, i64 1, align 32
979 !CHECK: %2 = alloca <512 x i1>, i64 1, align 64
980 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
981 !CHECK: %4 = load <256 x i1>, ptr %1, align 32
982 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
983 !CHECK: %6 = load <512 x i1>, ptr %2, align 64
984 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5, i32 7, i32 2)
985 !CHECK: store <512 x i1> %7, ptr %2, align 64
987 subroutine test_pmxvf64gerpn_u1_non_def()
988 use, intrinsic :: mma
990 vector(unsigned(1)) vu10
993 call mma_pmxvf64gerpn(cq
, cp
, vu10
, 7_2, 2_1)
994 end subroutine test_pmxvf64gerpn_u1_non_def
996 !CHECK-LABEL: @test_pmxvf64gerpn_u1_non_def_
997 !CHECK: %1 = alloca <256 x i1>, i64 1, align 32
998 !CHECK: %2 = alloca <512 x i1>, i64 1, align 64
999 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
1000 !CHECK: %4 = load <256 x i1>, ptr %1, align 32
1001 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
1002 !CHECK: %6 = load <512 x i1>, ptr %2, align 64
1003 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5, i32 7, i32 2)
1004 !CHECK: store <512 x i1> %7, ptr %2, align 64
1006 subroutine test_pmxvf64gerpn_r8_def()
1007 use, intrinsic :: mma
1009 vector(real(8)) vr80
1012 call mma_pmxvf64gerpn(cq
, cp
, vr80
, 7, 2)
1013 end subroutine test_pmxvf64gerpn_r8_def
1015 !CHECK-LABEL: @test_pmxvf64gerpn_r8_def_
1016 !CHECK: %1 = alloca <256 x i1>, i64 1, align 32
1017 !CHECK: %2 = alloca <512 x i1>, i64 1, align 64
1018 !CHECK: %3 = alloca <2 x double>, i64 1, align 16
1019 !CHECK: %4 = load <256 x i1>, ptr %1, align 32
1020 !CHECK: %5 = load <2 x double>, ptr %3, align 16
1021 !CHECK: %6 = load <512 x i1>, ptr %2, align 64
1022 !CHECK: %7 = bitcast <2 x double> %5 to <16 x i8>
1023 !CHECK: %8 = call <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7, i32 7, i32 2)
1024 !CHECK: store <512 x i1> %8, ptr %2, align 64
1026 subroutine test_pmxvf64gerpn_r8_non_def()
1027 use, intrinsic :: mma
1029 vector(real(8)) vr80
1032 call mma_pmxvf64gerpn(cq
, cp
, vr80
, 7_2, 2_1)
1033 end subroutine test_pmxvf64gerpn_r8_non_def
1035 !CHECK-LABEL: @test_pmxvf64gerpn_r8_non_def_
1036 !CHECK: %1 = alloca <256 x i1>, i64 1, align 32
1037 !CHECK: %2 = alloca <512 x i1>, i64 1, align 64
1038 !CHECK: %3 = alloca <2 x double>, i64 1, align 16
1039 !CHECK: %4 = load <256 x i1>, ptr %1, align 32
1040 !CHECK: %5 = load <2 x double>, ptr %3, align 16
1041 !CHECK: %6 = load <512 x i1>, ptr %2, align 64
1042 !CHECK: %7 = bitcast <2 x double> %5 to <16 x i8>
1043 !CHECK: %8 = call <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7, i32 7, i32 2)
1044 !CHECK: store <512 x i1> %8, ptr %2, align 64
1046 subroutine test_pmxvf64gerpp_u1_def()
1047 use, intrinsic :: mma
1049 vector(unsigned(1)) vu10
1052 call mma_pmxvf64gerpp(cq
, cp
, vu10
, 7, 2)
1053 end subroutine test_pmxvf64gerpp_u1_def
1055 !CHECK-LABEL: @test_pmxvf64gerpp_u1_def_
1056 !CHECK: %1 = alloca <256 x i1>, i64 1, align 32
1057 !CHECK: %2 = alloca <512 x i1>, i64 1, align 64
1058 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
1059 !CHECK: %4 = load <256 x i1>, ptr %1, align 32
1060 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
1061 !CHECK: %6 = load <512 x i1>, ptr %2, align 64
1062 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5, i32 7, i32 2)
1063 !CHECK: store <512 x i1> %7, ptr %2, align 64
1065 subroutine test_pmxvf64gerpp_u1_non_def()
1066 use, intrinsic :: mma
1068 vector(unsigned(1)) vu10
1071 call mma_pmxvf64gerpp(cq
, cp
, vu10
, 7_2, 2_1)
1072 end subroutine test_pmxvf64gerpp_u1_non_def
1074 !CHECK-LABEL: @test_pmxvf64gerpp_u1_non_def_
1075 !CHECK: %1 = alloca <256 x i1>, i64 1, align 32
1076 !CHECK: %2 = alloca <512 x i1>, i64 1, align 64
1077 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
1078 !CHECK: %4 = load <256 x i1>, ptr %1, align 32
1079 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
1080 !CHECK: %6 = load <512 x i1>, ptr %2, align 64
1081 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %5, i32 7, i32 2)
1082 !CHECK: store <512 x i1> %7, ptr %2, align 64
1084 subroutine test_pmxvf64gerpp_r8_def()
1085 use, intrinsic :: mma
1087 vector(real(8)) vr80
1090 call mma_pmxvf64gerpp(cq
, cp
, vr80
, 7, 2)
1091 end subroutine test_pmxvf64gerpp_r8_def
1093 !CHECK-LABEL: @test_pmxvf64gerpp_r8_def_
1094 !CHECK: %1 = alloca <256 x i1>, i64 1, align 32
1095 !CHECK: %2 = alloca <512 x i1>, i64 1, align 64
1096 !CHECK: %3 = alloca <2 x double>, i64 1, align 16
1097 !CHECK: %4 = load <256 x i1>, ptr %1, align 32
1098 !CHECK: %5 = load <2 x double>, ptr %3, align 16
1099 !CHECK: %6 = load <512 x i1>, ptr %2, align 64
1100 !CHECK: %7 = bitcast <2 x double> %5 to <16 x i8>
1101 !CHECK: %8 = call <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7, i32 7, i32 2)
1102 !CHECK: store <512 x i1> %8, ptr %2, align 64
1104 subroutine test_pmxvf64gerpp_r8_non_def()
1105 use, intrinsic :: mma
1107 vector(real(8)) vr80
1110 call mma_pmxvf64gerpp(cq
, cp
, vr80
, 7_2, 2_1)
1111 end subroutine test_pmxvf64gerpp_r8_non_def
1113 !CHECK-LABEL: @test_pmxvf64gerpp_r8_non_def_
1114 !CHECK: %1 = alloca <256 x i1>, i64 1, align 32
1115 !CHECK: %2 = alloca <512 x i1>, i64 1, align 64
1116 !CHECK: %3 = alloca <2 x double>, i64 1, align 16
1117 !CHECK: %4 = load <256 x i1>, ptr %1, align 32
1118 !CHECK: %5 = load <2 x double>, ptr %3, align 16
1119 !CHECK: %6 = load <512 x i1>, ptr %2, align 64
1120 !CHECK: %7 = bitcast <2 x double> %5 to <16 x i8>
1121 !CHECK: %8 = call <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1> %6, <256 x i1> %4, <16 x i8> %7, i32 7, i32 2)
1122 !CHECK: store <512 x i1> %8, ptr %2, align 64
1124 subroutine test_pmxvi16ger2_u1_def()
1125 use, intrinsic :: mma
1127 vector(unsigned(1)) vu10
, vu11
1129 call mma_pmxvi16ger2(cq
, vu10
, vu11
, 7, 7, 2)
1130 end subroutine test_pmxvi16ger2_u1_def
1132 !CHECK-LABEL: @test_pmxvi16ger2_u1_def_
1133 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
1134 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
1135 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
1136 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
1137 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
1138 !CHECK: %6 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
1139 !CHECK: store <512 x i1> %6, ptr %1, align 64
1141 subroutine test_pmxvi16ger2_u1_non_def()
1142 use, intrinsic :: mma
1144 vector(unsigned(1)) vu10
, vu11
1146 call mma_pmxvi16ger2(cq
, vu10
, vu11
, 7_2, 7_1, 2_8)
1147 end subroutine test_pmxvi16ger2_u1_non_def
1149 !CHECK-LABEL: @test_pmxvi16ger2_u1_non_def_
1150 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
1151 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
1152 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
1153 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
1154 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
1155 !CHECK: %6 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
1156 !CHECK: store <512 x i1> %6, ptr %1, align 64
1158 subroutine test_pmxvi16ger2_i2_def()
1159 use, intrinsic :: mma
1161 vector(integer(2)) vi20
, vi21
1163 call mma_pmxvi16ger2(cq
, vi20
, vi21
, 7, 7, 2)
1164 end subroutine test_pmxvi16ger2_i2_def
1166 !CHECK-LABEL: @test_pmxvi16ger2_i2_def_
1167 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
1168 !CHECK: %2 = alloca <8 x i16>, i64 1, align 16
1169 !CHECK: %3 = alloca <8 x i16>, i64 1, align 16
1170 !CHECK: %4 = load <8 x i16>, ptr %2, align 16
1171 !CHECK: %5 = load <8 x i16>, ptr %3, align 16
1172 !CHECK: %6 = bitcast <8 x i16> %4 to <16 x i8>
1173 !CHECK: %7 = bitcast <8 x i16> %5 to <16 x i8>
1174 !CHECK: %8 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8> %6, <16 x i8> %7, i32 7, i32 7, i32 2)
1175 !CHECK: store <512 x i1> %8, ptr %1, align 64
1177 subroutine test_pmxvi16ger2_i2_non_def()
1178 use, intrinsic :: mma
1180 vector(integer(2)) vi20
, vi21
1182 call mma_pmxvi16ger2(cq
, vi20
, vi21
, 7_2, 7_1, 2_8)
1183 end subroutine test_pmxvi16ger2_i2_non_def
1185 !CHECK-LABEL: @test_pmxvi16ger2_i2_non_def_
1186 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
1187 !CHECK: %2 = alloca <8 x i16>, i64 1, align 16
1188 !CHECK: %3 = alloca <8 x i16>, i64 1, align 16
1189 !CHECK: %4 = load <8 x i16>, ptr %2, align 16
1190 !CHECK: %5 = load <8 x i16>, ptr %3, align 16
1191 !CHECK: %6 = bitcast <8 x i16> %4 to <16 x i8>
1192 !CHECK: %7 = bitcast <8 x i16> %5 to <16 x i8>
1193 !CHECK: %8 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8> %6, <16 x i8> %7, i32 7, i32 7, i32 2)
1194 !CHECK: store <512 x i1> %8, ptr %1, align 64
1196 subroutine test_pmxvi16ger2pp_u1_def()
1197 use, intrinsic :: mma
1199 vector(unsigned(1)) vu10
, vu11
1201 call mma_pmxvi16ger2pp(cq
, vu10
, vu11
, 7, 7, 2)
1202 end subroutine test_pmxvi16ger2pp_u1_def
1204 !CHECK-LABEL: @test_pmxvi16ger2pp_u1_def_
1205 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
1206 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
1207 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
1208 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
1209 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
1210 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
1211 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
1212 !CHECK: store <512 x i1> %7, ptr %1, align 64
1214 subroutine test_pmxvi16ger2pp_u1_non_def()
1215 use, intrinsic :: mma
1217 vector(unsigned(1)) vu10
, vu11
1219 call mma_pmxvi16ger2pp(cq
, vu10
, vu11
, 7_2, 7_1, 2_8)
1220 end subroutine test_pmxvi16ger2pp_u1_non_def
1222 !CHECK-LABEL: @test_pmxvi16ger2pp_u1_non_def_
1223 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
1224 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
1225 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
1226 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
1227 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
1228 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
1229 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
1230 !CHECK: store <512 x i1> %7, ptr %1, align 64
1232 subroutine test_pmxvi16ger2pp_i2_def()
1233 use, intrinsic :: mma
1235 vector(integer(2)) vi20
, vi21
1237 call mma_pmxvi16ger2pp(cq
, vi20
, vi21
, 7, 7, 2)
1238 end subroutine test_pmxvi16ger2pp_i2_def
1240 !CHECK-LABEL: @test_pmxvi16ger2pp_i2_def_
1241 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
1242 !CHECK: %2 = alloca <8 x i16>, i64 1, align 16
1243 !CHECK: %3 = alloca <8 x i16>, i64 1, align 16
1244 !CHECK: %4 = load <8 x i16>, ptr %2, align 16
1245 !CHECK: %5 = load <8 x i16>, ptr %3, align 16
1246 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
1247 !CHECK: %7 = bitcast <8 x i16> %4 to <16 x i8>
1248 !CHECK: %8 = bitcast <8 x i16> %5 to <16 x i8>
1249 !CHECK: %9 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 7, i32 2)
1250 !CHECK: store <512 x i1> %9, ptr %1, align 64
1252 subroutine test_pmxvi16ger2pp_i2_non_def()
1253 use, intrinsic :: mma
1255 vector(integer(2)) vi20
, vi21
1257 call mma_pmxvi16ger2pp(cq
, vi20
, vi21
, 7_2, 7_1, 2_8)
1258 end subroutine test_pmxvi16ger2pp_i2_non_def
1260 !CHECK-LABEL: @test_pmxvi16ger2pp_i2_non_def_
1261 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
1262 !CHECK: %2 = alloca <8 x i16>, i64 1, align 16
1263 !CHECK: %3 = alloca <8 x i16>, i64 1, align 16
1264 !CHECK: %4 = load <8 x i16>, ptr %2, align 16
1265 !CHECK: %5 = load <8 x i16>, ptr %3, align 16
1266 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
1267 !CHECK: %7 = bitcast <8 x i16> %4 to <16 x i8>
1268 !CHECK: %8 = bitcast <8 x i16> %5 to <16 x i8>
1269 !CHECK: %9 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 7, i32 2)
1270 !CHECK: store <512 x i1> %9, ptr %1, align 64
1272 subroutine test_pmxvi16ger2s_u1_def()
1273 use, intrinsic :: mma
1275 vector(unsigned(1)) vu10
, vu11
1277 call mma_pmxvi16ger2s(cq
, vu10
, vu11
, 7, 7, 2)
1278 end subroutine test_pmxvi16ger2s_u1_def
1280 !CHECK-LABEL: @test_pmxvi16ger2s_u1_def_
1281 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
1282 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
1283 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
1284 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
1285 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
1286 !CHECK: %6 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
1287 !CHECK: store <512 x i1> %6, ptr %1, align 64
1289 subroutine test_pmxvi16ger2s_u1_non_def()
1290 use, intrinsic :: mma
1292 vector(unsigned(1)) vu10
, vu11
1294 call mma_pmxvi16ger2s(cq
, vu10
, vu11
, 7_2, 7_1, 2_8)
1295 end subroutine test_pmxvi16ger2s_u1_non_def
1297 !CHECK-LABEL: @test_pmxvi16ger2s_u1_non_def_
1298 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
1299 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
1300 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
1301 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
1302 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
1303 !CHECK: %6 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
1304 !CHECK: store <512 x i1> %6, ptr %1, align 64
1306 subroutine test_pmxvi16ger2s_i2_def()
1307 use, intrinsic :: mma
1309 vector(integer(2)) vi20
, vi21
1311 call mma_pmxvi16ger2s(cq
, vi20
, vi21
, 7, 7, 2)
1312 end subroutine test_pmxvi16ger2s_i2_def
1314 !CHECK-LABEL: @test_pmxvi16ger2s_i2_def_
1315 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
1316 !CHECK: %2 = alloca <8 x i16>, i64 1, align 16
1317 !CHECK: %3 = alloca <8 x i16>, i64 1, align 16
1318 !CHECK: %4 = load <8 x i16>, ptr %2, align 16
1319 !CHECK: %5 = load <8 x i16>, ptr %3, align 16
1320 !CHECK: %6 = bitcast <8 x i16> %4 to <16 x i8>
1321 !CHECK: %7 = bitcast <8 x i16> %5 to <16 x i8>
1322 !CHECK: %8 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8> %6, <16 x i8> %7, i32 7, i32 7, i32 2)
1323 !CHECK: store <512 x i1> %8, ptr %1, align 64
1325 subroutine test_pmxvi16ger2s_i2_non_def()
1326 use, intrinsic :: mma
1328 vector(integer(2)) vi20
, vi21
1330 call mma_pmxvi16ger2s(cq
, vi20
, vi21
, 7_2, 7_1, 2_8)
1331 end subroutine test_pmxvi16ger2s_i2_non_def
1333 !CHECK-LABEL: @test_pmxvi16ger2s_i2_non_def_
1334 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
1335 !CHECK: %2 = alloca <8 x i16>, i64 1, align 16
1336 !CHECK: %3 = alloca <8 x i16>, i64 1, align 16
1337 !CHECK: %4 = load <8 x i16>, ptr %2, align 16
1338 !CHECK: %5 = load <8 x i16>, ptr %3, align 16
1339 !CHECK: %6 = bitcast <8 x i16> %4 to <16 x i8>
1340 !CHECK: %7 = bitcast <8 x i16> %5 to <16 x i8>
1341 !CHECK: %8 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8> %6, <16 x i8> %7, i32 7, i32 7, i32 2)
1342 !CHECK: store <512 x i1> %8, ptr %1, align 64
1344 subroutine test_pmxvi16ger2spp_u1_def()
1345 use, intrinsic :: mma
1347 vector(unsigned(1)) vu10
, vu11
1349 call mma_pmxvi16ger2spp(cq
, vu10
, vu11
, 7, 7, 2)
1350 end subroutine test_pmxvi16ger2spp_u1_def
1352 !CHECK-LABEL: @test_pmxvi16ger2spp_u1_def_
1353 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
1354 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
1355 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
1356 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
1357 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
1358 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
1359 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
1360 !CHECK: store <512 x i1> %7, ptr %1, align 64
1362 subroutine test_pmxvi16ger2spp_u1_non_def()
1363 use, intrinsic :: mma
1365 vector(unsigned(1)) vu10
, vu11
1367 call mma_pmxvi16ger2spp(cq
, vu10
, vu11
, 7_2, 7_1, 2_8)
1368 end subroutine test_pmxvi16ger2spp_u1_non_def
1370 !CHECK-LABEL: @test_pmxvi16ger2spp_u1_non_def_
1371 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
1372 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
1373 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
1374 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
1375 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
1376 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
1377 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
1378 !CHECK: store <512 x i1> %7, ptr %1, align 64
1380 subroutine test_pmxvi16ger2spp_i2_def()
1381 use, intrinsic :: mma
1383 vector(integer(2)) vi20
, vi21
1385 call mma_pmxvi16ger2spp(cq
, vi20
, vi21
, 7, 7, 2)
1386 end subroutine test_pmxvi16ger2spp_i2_def
1388 !CHECK-LABEL: @test_pmxvi16ger2spp_i2_def_
1389 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
1390 !CHECK: %2 = alloca <8 x i16>, i64 1, align 16
1391 !CHECK: %3 = alloca <8 x i16>, i64 1, align 16
1392 !CHECK: %4 = load <8 x i16>, ptr %2, align 16
1393 !CHECK: %5 = load <8 x i16>, ptr %3, align 16
1394 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
1395 !CHECK: %7 = bitcast <8 x i16> %4 to <16 x i8>
1396 !CHECK: %8 = bitcast <8 x i16> %5 to <16 x i8>
1397 !CHECK: %9 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 7, i32 2)
1398 !CHECK: store <512 x i1> %9, ptr %1, align 64
1400 subroutine test_pmxvi16ger2spp_i2_non_def()
1401 use, intrinsic :: mma
1403 vector(integer(2)) vi20
, vi21
1405 call mma_pmxvi16ger2spp(cq
, vi20
, vi21
, 7_2, 7_1, 2_8)
1406 end subroutine test_pmxvi16ger2spp_i2_non_def
1408 !CHECK-LABEL: @test_pmxvi16ger2spp_i2_non_def_
1409 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
1410 !CHECK: %2 = alloca <8 x i16>, i64 1, align 16
1411 !CHECK: %3 = alloca <8 x i16>, i64 1, align 16
1412 !CHECK: %4 = load <8 x i16>, ptr %2, align 16
1413 !CHECK: %5 = load <8 x i16>, ptr %3, align 16
1414 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
1415 !CHECK: %7 = bitcast <8 x i16> %4 to <16 x i8>
1416 !CHECK: %8 = bitcast <8 x i16> %5 to <16 x i8>
1417 !CHECK: %9 = call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> %6, <16 x i8> %7, <16 x i8> %8, i32 7, i32 7, i32 2)
1418 !CHECK: store <512 x i1> %9, ptr %1, align 64
1421 subroutine test_pmxvi4ger8_def()
1422 use, intrinsic :: mma
1424 vector(unsigned(1)) vu10
, vu11
1426 call mma_pmxvi4ger8(cq
, vu10
, vu11
, 7, 7, 2)
1427 end subroutine test_pmxvi4ger8_def
1429 !CHECK-LABEL: @test_pmxvi4ger8_def_
1430 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
1431 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
1432 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
1433 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
1434 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
1435 !CHECK: %6 = call <512 x i1> @llvm.ppc.mma.pmxvi4ger8(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
1436 !CHECK: store <512 x i1> %6, ptr %1, align 64
1438 subroutine test_pmxvi4ger8_non_def()
1439 use, intrinsic :: mma
1441 vector(unsigned(1)) vu10
, vu11
1443 call mma_pmxvi4ger8(cq
, vu10
, vu11
, 7_2, 7_1, 2_8)
1444 end subroutine test_pmxvi4ger8_non_def
1446 !CHECK-LABEL: @test_pmxvi4ger8_non_def_
1447 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
1448 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
1449 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
1450 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
1451 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
1452 !CHECK: %6 = call <512 x i1> @llvm.ppc.mma.pmxvi4ger8(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
1453 !CHECK: store <512 x i1> %6, ptr %1, align 64
1455 subroutine test_pmxvi4ger8pp_def()
1456 use, intrinsic :: mma
1458 vector(unsigned(1)) vu10
, vu11
1460 call mma_pmxvi4ger8pp(cq
, vu10
, vu11
, 7, 7, 2)
1461 end subroutine test_pmxvi4ger8pp_def
1463 !CHECK-LABEL: @test_pmxvi4ger8pp_def_
1464 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
1465 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
1466 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
1467 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
1468 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
1469 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
1470 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvi4ger8pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
1471 !CHECK: store <512 x i1> %7, ptr %1, align 64
1473 subroutine test_pmxvi4ger8pp_non_def()
1474 use, intrinsic :: mma
1476 vector(unsigned(1)) vu10
, vu11
1478 call mma_pmxvi4ger8pp(cq
, vu10
, vu11
, 7_2, 7_1, 2_8)
1479 end subroutine test_pmxvi4ger8pp_non_def
1481 !CHECK-LABEL: @test_pmxvi4ger8pp_non_def_
1482 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
1483 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
1484 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
1485 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
1486 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
1487 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
1488 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvi4ger8pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
1489 !CHECK: store <512 x i1> %7, ptr %1, align 64
1491 subroutine test_pmxvi8ger4_u1_def()
1492 use, intrinsic :: mma
1494 vector(unsigned(1)) vu10
, vu11
1496 call mma_pmxvi8ger4(cq
, vu10
, vu11
, 7, 7, 2)
1497 end subroutine test_pmxvi8ger4_u1_def
1499 !CHECK-LABEL: @test_pmxvi8ger4_u1_def_
1500 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
1501 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
1502 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
1503 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
1504 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
1505 !CHECK: %6 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
1506 !CHECK: store <512 x i1> %6, ptr %1, align 64
1508 subroutine test_pmxvi8ger4_u1_non_def()
1509 use, intrinsic :: mma
1511 vector(unsigned(1)) vu10
, vu11
1513 call mma_pmxvi8ger4(cq
, vu10
, vu11
, 7_2, 7_1, 2_8)
1514 end subroutine test_pmxvi8ger4_u1_non_def
1516 !CHECK-LABEL: @test_pmxvi8ger4_u1_non_def_
1517 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
1518 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
1519 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
1520 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
1521 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
1522 !CHECK: %6 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
1523 !CHECK: store <512 x i1> %6, ptr %1, align 64
1525 subroutine test_pmxvi8ger4_i1_def()
1526 use, intrinsic :: mma
1528 vector(integer(1)) vi10
, vi11
1530 call mma_pmxvi8ger4(cq
, vi10
, vi11
, 7, 7, 2)
1531 end subroutine test_pmxvi8ger4_i1_def
1533 !CHECK-LABEL: @test_pmxvi8ger4_i1_def_
1534 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
1535 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
1536 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
1537 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
1538 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
1539 !CHECK: %6 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
1540 !CHECK: store <512 x i1> %6, ptr %1, align 64
1542 subroutine test_pmxvi8ger4_i1_non_def()
1543 use, intrinsic :: mma
1545 vector(integer(1)) vi10
, vi11
1547 call mma_pmxvi8ger4(cq
, vi10
, vi11
, 7_2, 7_1, 2_8)
1548 end subroutine test_pmxvi8ger4_i1_non_def
1550 !CHECK-LABEL: @test_pmxvi8ger4_i1_non_def_
1551 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
1552 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
1553 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
1554 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
1555 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
1556 !CHECK: %6 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
1557 !CHECK: store <512 x i1> %6, ptr %1, align 64
1559 subroutine test_pmxvi8ger4pp_u1_def()
1560 use, intrinsic :: mma
1562 vector(unsigned(1)) vu10
, vu11
1564 call mma_pmxvi8ger4pp(cq
, vu10
, vu11
, 7, 7, 2)
1565 end subroutine test_pmxvi8ger4pp_u1_def
1567 !CHECK-LABEL: @test_pmxvi8ger4pp_u1_def_
1568 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
1569 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
1570 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
1571 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
1572 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
1573 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
1574 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
1575 !CHECK: store <512 x i1> %7, ptr %1, align 64
1577 subroutine test_pmxvi8ger4pp_u1_non_def()
1578 use, intrinsic :: mma
1580 vector(unsigned(1)) vu10
, vu11
1582 call mma_pmxvi8ger4pp(cq
, vu10
, vu11
, 7_2, 7_1, 2_8)
1583 end subroutine test_pmxvi8ger4pp_u1_non_def
1585 !CHECK-LABEL: @test_pmxvi8ger4pp_u1_non_def_
1586 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
1587 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
1588 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
1589 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
1590 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
1591 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
1592 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
1593 !CHECK: store <512 x i1> %7, ptr %1, align 64
1595 subroutine test_pmxvi8ger4pp_i1_def()
1596 use, intrinsic :: mma
1598 vector(integer(1)) vi10
, vi11
1600 call mma_pmxvi8ger4pp(cq
, vi10
, vi11
, 7, 7, 2)
1601 end subroutine test_pmxvi8ger4pp_i1_def
1603 !CHECK-LABEL: @test_pmxvi8ger4pp_i1_def_
1604 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
1605 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
1606 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
1607 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
1608 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
1609 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
1610 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
1611 !CHECK: store <512 x i1> %7, ptr %1, align 64
1613 subroutine test_pmxvi8ger4pp_i1_non_def()
1614 use, intrinsic :: mma
1616 vector(integer(1)) vi10
, vi11
1618 call mma_pmxvi8ger4pp(cq
, vi10
, vi11
, 7_2, 7_1, 2_8)
1619 end subroutine test_pmxvi8ger4pp_i1_non_def
1621 !CHECK-LABEL: @test_pmxvi8ger4pp_i1_non_def_
1622 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
1623 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
1624 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
1625 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
1626 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
1627 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
1628 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
1629 !CHECK: store <512 x i1> %7, ptr %1, align 64
1631 subroutine test_pmxvi8ger4spp_u1_def()
1632 use, intrinsic :: mma
1634 vector(unsigned(1)) vu10
, vu11
1636 call mma_pmxvi8ger4spp(cq
, vu10
, vu11
, 7, 7, 2)
1637 end subroutine test_pmxvi8ger4spp_u1_def
1639 !CHECK-LABEL: @test_pmxvi8ger4spp_u1_def_
1640 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
1641 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
1642 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
1643 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
1644 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
1645 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
1646 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
1647 !CHECK: store <512 x i1> %7, ptr %1, align 64
1649 subroutine test_pmxvi8ger4spp_u1_non_def()
1650 use, intrinsic :: mma
1652 vector(unsigned(1)) vu10
, vu11
1654 call mma_pmxvi8ger4spp(cq
, vu10
, vu11
, 7_2, 7_1, 2_8)
1655 end subroutine test_pmxvi8ger4spp_u1_non_def
1657 !CHECK-LABEL: @test_pmxvi8ger4spp_u1_non_def_
1658 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
1659 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
1660 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
1661 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
1662 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
1663 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
1664 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
1665 !CHECK: store <512 x i1> %7, ptr %1, align 64
1667 subroutine test_pmxvi8ger4spp_i1_def()
1668 use, intrinsic :: mma
1670 vector(integer(1)) vi10
, vi11
1672 call mma_pmxvi8ger4spp(cq
, vi10
, vi11
, 7, 7, 2)
1673 end subroutine test_pmxvi8ger4spp_i1_def
1675 !CHECK-LABEL: @test_pmxvi8ger4spp_i1_def_
1676 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
1677 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
1678 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
1679 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
1680 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
1681 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
1682 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
1683 !CHECK: store <512 x i1> %7, ptr %1, align 64
1685 subroutine test_pmxvi8ger4spp_i1_non_def()
1686 use, intrinsic :: mma
1688 vector(integer(1)) vi10
, vi11
1690 call mma_pmxvi8ger4spp(cq
, vi10
, vi11
, 7_2, 7_1, 2_8)
1691 end subroutine test_pmxvi8ger4spp_i1_non_def
1693 !CHECK-LABEL: @test_pmxvi8ger4spp_i1_non_def_
1694 !CHECK: %1 = alloca <512 x i1>, i64 1, align 64
1695 !CHECK: %2 = alloca <16 x i8>, i64 1, align 16
1696 !CHECK: %3 = alloca <16 x i8>, i64 1, align 16
1697 !CHECK: %4 = load <16 x i8>, ptr %2, align 16
1698 !CHECK: %5 = load <16 x i8>, ptr %3, align 16
1699 !CHECK: %6 = load <512 x i1>, ptr %1, align 64
1700 !CHECK: %7 = call <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1> %6, <16 x i8> %4, <16 x i8> %5, i32 7, i32 7, i32 2)
1701 !CHECK: store <512 x i1> %7, ptr %1, align 64