1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX12
4 define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<8 x half> %A, <8 x half> %B, <8 x float> %C, ptr addrspace(1) %out) {
5 ; GFX12-LABEL: test_wmma_f32_16x16x16_f16:
6 ; GFX12: ; %bb.0: ; %bb
7 ; GFX12-NEXT: v_wmma_f32_16x16x16_f16 v[8:15], v[0:3], v[4:7], v[8:15]
8 ; GFX12-NEXT: s_clause 0x1
9 ; GFX12-NEXT: global_store_b128 v[16:17], v[12:15], off offset:16
10 ; GFX12-NEXT: global_store_b128 v[16:17], v[8:11], off
11 ; GFX12-NEXT: s_endpgm
13 %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v8f16(<8 x half> %A, <8 x half> %B, <8 x float> %C)
14 store <8 x float> %res, ptr addrspace(1) %out
18 define amdgpu_ps void @test_wmma_f32_16x16x16_bf16(<8 x i16> %A, <8 x i16> %B, <8 x float> %C, ptr addrspace(1) %out) {
19 ; GFX12-LABEL: test_wmma_f32_16x16x16_bf16:
20 ; GFX12: ; %bb.0: ; %bb
21 ; GFX12-NEXT: v_wmma_f32_16x16x16_bf16 v[8:15], v[0:3], v[4:7], v[8:15]
22 ; GFX12-NEXT: s_clause 0x1
23 ; GFX12-NEXT: global_store_b128 v[16:17], v[12:15], off offset:16
24 ; GFX12-NEXT: global_store_b128 v[16:17], v[8:11], off
25 ; GFX12-NEXT: s_endpgm
27 %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v8i16(<8 x i16> %A, <8 x i16> %B, <8 x float> %C)
28 store <8 x float> %res, ptr addrspace(1) %out
32 define amdgpu_ps void @test_wmma_f16_16x16x16_f16(<8 x half> %A, <8 x half> %B, <8 x half> %C, ptr addrspace(1) %out) {
33 ; GFX12-LABEL: test_wmma_f16_16x16x16_f16:
34 ; GFX12: ; %bb.0: ; %bb
35 ; GFX12-NEXT: v_wmma_f16_16x16x16_f16 v[8:11], v[0:3], v[4:7], v[8:11]
36 ; GFX12-NEXT: global_store_b128 v[12:13], v[8:11], off
37 ; GFX12-NEXT: s_endpgm
39 %res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half> %A, <8 x half> %B, <8 x half> %C, i1 0)
40 store <8 x half> %res, ptr addrspace(1) %out
44 define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, ptr addrspace(1) %out) {
45 ; GFX12-LABEL: test_wmma_bf16_16x16x16_bf16:
46 ; GFX12: ; %bb.0: ; %bb
47 ; GFX12-NEXT: v_wmma_bf16_16x16x16_bf16 v[8:11], v[0:3], v[4:7], v[8:11]
48 ; GFX12-NEXT: global_store_b128 v[12:13], v[8:11], off
49 ; GFX12-NEXT: s_endpgm
51 %res = call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v8i16.v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i1 0)
52 store <8 x i16> %res, ptr addrspace(1) %out
56 define amdgpu_ps void @test_wmma_i32_16x16x16_iu8(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
57 ; GFX12-LABEL: test_wmma_i32_16x16x16_iu8:
58 ; GFX12: ; %bb.0: ; %bb
59 ; GFX12-NEXT: v_wmma_i32_16x16x16_iu8 v[4:11], v[0:1], v[2:3], v[4:11]
60 ; GFX12-NEXT: s_clause 0x1
61 ; GFX12-NEXT: global_store_b128 v[12:13], v[8:11], off offset:16
62 ; GFX12-NEXT: global_store_b128 v[12:13], v[4:7], off
63 ; GFX12-NEXT: s_endpgm
65 %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v2i32(i1 0, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> %C, i1 0)
66 store <8 x i32> %res, ptr addrspace(1) %out
70 define amdgpu_ps void @test_wmma_i32_16x16x16_iu4(i32 %A, i32 %B, <8 x i32> %C, ptr addrspace(1) %out) {
71 ; GFX12-LABEL: test_wmma_i32_16x16x16_iu4:
72 ; GFX12: ; %bb.0: ; %bb
73 ; GFX12-NEXT: v_wmma_i32_16x16x16_iu4 v[2:9], v0, v1, v[2:9]
74 ; GFX12-NEXT: s_clause 0x1
75 ; GFX12-NEXT: global_store_b128 v[10:11], v[6:9], off offset:16
76 ; GFX12-NEXT: global_store_b128 v[10:11], v[2:5], off
77 ; GFX12-NEXT: s_endpgm
79 %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.i32(i1 0, i32 %A, i1 0, i32 %B, <8 x i32> %C, i1 0)
80 store <8 x i32> %res, ptr addrspace(1) %out
84 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_fp8(<2 x i32> %A, <2 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) {
85 ; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_fp8:
86 ; GFX12: ; %bb.0: ; %bb
87 ; GFX12-NEXT: v_wmma_f32_16x16x16_fp8_fp8 v[4:11], v[0:1], v[2:3], v[4:11]
88 ; GFX12-NEXT: s_clause 0x1
89 ; GFX12-NEXT: global_store_b128 v[12:13], v[8:11], off offset:16
90 ; GFX12-NEXT: global_store_b128 v[12:13], v[4:7], off
91 ; GFX12-NEXT: s_endpgm
93 %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v8f32.v2i32(<2 x i32> %A, <2 x i32> %B, <8 x float> %C)
94 store <8 x float> %res, ptr addrspace(1) %out
98 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_fp8(<2 x i32> %A, <2 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) {
99 ; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_fp8:
100 ; GFX12: ; %bb.0: ; %bb
101 ; GFX12-NEXT: v_wmma_f32_16x16x16_bf8_fp8 v[4:11], v[0:1], v[2:3], v[4:11]
102 ; GFX12-NEXT: s_clause 0x1
103 ; GFX12-NEXT: global_store_b128 v[12:13], v[8:11], off offset:16
104 ; GFX12-NEXT: global_store_b128 v[12:13], v[4:7], off
105 ; GFX12-NEXT: s_endpgm
107 %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v8f32.v2i32(<2 x i32> %A, <2 x i32> %B, <8 x float> %C)
108 store <8 x float> %res, ptr addrspace(1) %out
112 define amdgpu_ps void @test_wmma_f32_16x16x16_fp8_bf8(<2 x i32> %A, <2 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) {
113 ; GFX12-LABEL: test_wmma_f32_16x16x16_fp8_bf8:
114 ; GFX12: ; %bb.0: ; %bb
115 ; GFX12-NEXT: v_wmma_f32_16x16x16_fp8_bf8 v[4:11], v[0:1], v[2:3], v[4:11]
116 ; GFX12-NEXT: s_clause 0x1
117 ; GFX12-NEXT: global_store_b128 v[12:13], v[8:11], off offset:16
118 ; GFX12-NEXT: global_store_b128 v[12:13], v[4:7], off
119 ; GFX12-NEXT: s_endpgm
121 %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v8f32.v2i32(<2 x i32> %A, <2 x i32> %B, <8 x float> %C)
122 store <8 x float> %res, ptr addrspace(1) %out
126 define amdgpu_ps void @test_wmma_f32_16x16x16_bf8_bf8(<2 x i32> %A, <2 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) {
127 ; GFX12-LABEL: test_wmma_f32_16x16x16_bf8_bf8:
128 ; GFX12: ; %bb.0: ; %bb
129 ; GFX12-NEXT: v_wmma_f32_16x16x16_bf8_bf8 v[4:11], v[0:1], v[2:3], v[4:11]
130 ; GFX12-NEXT: s_clause 0x1
131 ; GFX12-NEXT: global_store_b128 v[12:13], v[8:11], off offset:16
132 ; GFX12-NEXT: global_store_b128 v[12:13], v[4:7], off
133 ; GFX12-NEXT: s_endpgm
135 %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v8f32.v2i32(<2 x i32> %A, <2 x i32> %B, <8 x float> %C)
136 store <8 x float> %res, ptr addrspace(1) %out
140 define amdgpu_ps void @test_wmma_i32_16x16x32_iu4(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
141 ; GFX12-LABEL: test_wmma_i32_16x16x32_iu4:
142 ; GFX12: ; %bb.0: ; %bb
143 ; GFX12-NEXT: v_wmma_i32_16x16x32_iu4 v[4:11], v[0:1], v[2:3], v[4:11]
144 ; GFX12-NEXT: s_clause 0x1
145 ; GFX12-NEXT: global_store_b128 v[12:13], v[8:11], off offset:16
146 ; GFX12-NEXT: global_store_b128 v[12:13], v[4:7], off
147 ; GFX12-NEXT: s_endpgm
149 %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32(i1 0, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> %C, i1 0)
150 store <8 x i32> %res, ptr addrspace(1) %out
155 define amdgpu_ps void @test_swmmac_f32_16x16x32_f16(<8 x half> %A, <16 x half> %B, <8 x float> %C, i16 %Index, ptr addrspace(1) %out) {
156 ; GFX12-LABEL: test_swmmac_f32_16x16x32_f16:
157 ; GFX12: ; %bb.0: ; %bb
158 ; GFX12-NEXT: v_swmmac_f32_16x16x32_f16 v[12:19], v[0:3], v[4:11], v20
159 ; GFX12-NEXT: s_clause 0x1
160 ; GFX12-NEXT: global_store_b128 v[21:22], v[16:19], off offset:16
161 ; GFX12-NEXT: global_store_b128 v[21:22], v[12:15], off
162 ; GFX12-NEXT: s_endpgm
164 %res = call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v8f32.v8f16.v16f16.i16(<8 x half> %A, <16 x half> %B, <8 x float> %C, i16 %Index)
165 store <8 x float> %res, ptr addrspace(1) %out
169 define amdgpu_ps void @test_swmmac_f32_16x16x32_bf16(<8 x i16> %A, <16 x i16> %B, <8 x float> %C, i16 %Index, ptr addrspace(1) %out) {
170 ; GFX12-LABEL: test_swmmac_f32_16x16x32_bf16:
171 ; GFX12: ; %bb.0: ; %bb
172 ; GFX12-NEXT: v_swmmac_f32_16x16x32_bf16 v[12:19], v[0:3], v[4:11], v20
173 ; GFX12-NEXT: s_clause 0x1
174 ; GFX12-NEXT: global_store_b128 v[21:22], v[16:19], off offset:16
175 ; GFX12-NEXT: global_store_b128 v[21:22], v[12:15], off
176 ; GFX12-NEXT: s_endpgm
178 %res = call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v8f32.v8i16.v16i16.i16(<8 x i16> %A, <16 x i16> %B, <8 x float> %C, i16 %Index)
179 store <8 x float> %res, ptr addrspace(1) %out
183 define amdgpu_ps void @test_swmmac_f16_16x16x32_f16(<8 x half> %A, <16 x half> %B, <8 x half> %C, i16 %Index, ptr addrspace(1) %out) {
184 ; GFX12-LABEL: test_swmmac_f16_16x16x32_f16:
185 ; GFX12: ; %bb.0: ; %bb
186 ; GFX12-NEXT: v_swmmac_f16_16x16x32_f16 v[12:15], v[0:3], v[4:11], v16
187 ; GFX12-NEXT: global_store_b128 v[17:18], v[12:15], off
188 ; GFX12-NEXT: s_endpgm
190 %res = call <8 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v8f16.v8f16.v16f16.i16(<8 x half> %A, <16 x half> %B, <8 x half> %C, i16 %Index)
191 store <8 x half> %res, ptr addrspace(1) %out
195 define amdgpu_ps void @test_swmmac_bf16_16x16x32_bf16(<8 x i16> %A, <16 x i16> %B, <8 x i16> %C, i16 %Index, ptr addrspace(1) %out) {
196 ; GFX12-LABEL: test_swmmac_bf16_16x16x32_bf16:
197 ; GFX12: ; %bb.0: ; %bb
198 ; GFX12-NEXT: v_swmmac_bf16_16x16x32_bf16 v[12:15], v[0:3], v[4:11], v16
199 ; GFX12-NEXT: global_store_b128 v[17:18], v[12:15], off
200 ; GFX12-NEXT: s_endpgm
202 %res = call <8 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v8i16.v8i16.v16i16.i16(<8 x i16> %A, <16 x i16> %B, <8 x i16> %C, i16 %Index)
203 store <8 x i16> %res, ptr addrspace(1) %out
207 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu8(<2 x i32> %A, <4 x i32> %B, <8 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
208 ; GFX12-LABEL: test_swmmac_i32_16x16x32_iu8:
209 ; GFX12: ; %bb.0: ; %bb
210 ; GFX12-NEXT: v_swmmac_i32_16x16x32_iu8 v[6:13], v[0:1], v[2:5], v14
211 ; GFX12-NEXT: s_clause 0x1
212 ; GFX12-NEXT: global_store_b128 v[15:16], v[10:13], off offset:16
213 ; GFX12-NEXT: global_store_b128 v[15:16], v[6:9], off
214 ; GFX12-NEXT: s_endpgm
216 %res = call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v8i32.v2i32.v4i32.i16(i1 0, <2 x i32> %A, i1 0, <4 x i32> %B, <8 x i32> %C, i16 %Index, i1 0)
217 store <8 x i32> %res, ptr addrspace(1) %out
221 define amdgpu_ps void @test_swmmac_i32_16x16x32_iu4(i32 %A, <2 x i32> %B, <8 x i32> %C, i16 %Index, ptr addrspace(1) %out) {
222 ; GFX12-LABEL: test_swmmac_i32_16x16x32_iu4:
223 ; GFX12: ; %bb.0: ; %bb
224 ; GFX12-NEXT: v_swmmac_i32_16x16x32_iu4 v[3:10], v0, v[1:2], v11
225 ; GFX12-NEXT: s_clause 0x1
226 ; GFX12-NEXT: global_store_b128 v[12:13], v[7:10], off offset:16
227 ; GFX12-NEXT: global_store_b128 v[12:13], v[3:6], off
228 ; GFX12-NEXT: s_endpgm
230 %res = call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v8i32.i32.v2i32.i16(i1 0, i32 %A, i1 0, <2 x i32> %B, <8 x i32> %C, i16 %Index, i1 0)
231 store <8 x i32> %res, ptr addrspace(1) %out
235 define amdgpu_ps void @test_swmmac_i32_16x16x64_iu4(<2 x i32> %A, <4 x i32> %B, <8 x i32> %C, i32 %Index, ptr addrspace(1) %out) {
236 ; GFX12-LABEL: test_swmmac_i32_16x16x64_iu4:
237 ; GFX12: ; %bb.0: ; %bb
238 ; GFX12-NEXT: v_swmmac_i32_16x16x64_iu4 v[6:13], v[0:1], v[2:5], v14
239 ; GFX12-NEXT: s_clause 0x1
240 ; GFX12-NEXT: global_store_b128 v[15:16], v[10:13], off offset:16
241 ; GFX12-NEXT: global_store_b128 v[15:16], v[6:9], off
242 ; GFX12-NEXT: s_endpgm
244 %res = call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v8i32.v2i32.v4i32.i32(i1 0, <2 x i32> %A, i1 0, <4 x i32> %B, <8 x i32> %C, i32 %Index, i1 0)
245 store <8 x i32> %res, ptr addrspace(1) %out
249 define amdgpu_ps void @test_swmmac_f32_16x16x32_fp8_fp8(<2 x i32> %A, <4 x i32> %B, <8 x float> %C, i16 %Index, ptr addrspace(1) %out) {
250 ; GFX12-LABEL: test_swmmac_f32_16x16x32_fp8_fp8:
251 ; GFX12: ; %bb.0: ; %bb
252 ; GFX12-NEXT: v_swmmac_f32_16x16x32_fp8_fp8 v[6:13], v[0:1], v[2:5], v14
253 ; GFX12-NEXT: s_clause 0x1
254 ; GFX12-NEXT: global_store_b128 v[15:16], v[10:13], off offset:16
255 ; GFX12-NEXT: global_store_b128 v[15:16], v[6:9], off
256 ; GFX12-NEXT: s_endpgm
258 %res = call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v8f32.v2i32.v4i32.i16(<2 x i32> %A, <4 x i32> %B, <8 x float> %C, i16 %Index)
259 store <8 x float> %res, ptr addrspace(1) %out
263 define amdgpu_ps void @test_swmmac_f32_16x16x32_fp8_bf8(<2 x i32> %A, <4 x i32> %B, <8 x float> %C, i16 %Index, ptr addrspace(1) %out) {
264 ; GFX12-LABEL: test_swmmac_f32_16x16x32_fp8_bf8:
265 ; GFX12: ; %bb.0: ; %bb
266 ; GFX12-NEXT: v_swmmac_f32_16x16x32_fp8_bf8 v[6:13], v[0:1], v[2:5], v14
267 ; GFX12-NEXT: s_clause 0x1
268 ; GFX12-NEXT: global_store_b128 v[15:16], v[10:13], off offset:16
269 ; GFX12-NEXT: global_store_b128 v[15:16], v[6:9], off
270 ; GFX12-NEXT: s_endpgm
272 %res = call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v8f32.v2i32.v4i32.i16(<2 x i32> %A, <4 x i32> %B, <8 x float> %C, i16 %Index)
273 store <8 x float> %res, ptr addrspace(1) %out
277 define amdgpu_ps void @test_swmmac_f32_16x16x32_bf8_fp8(<2 x i32> %A, <4 x i32> %B, <8 x float> %C, i16 %Index, ptr addrspace(1) %out) {
278 ; GFX12-LABEL: test_swmmac_f32_16x16x32_bf8_fp8:
279 ; GFX12: ; %bb.0: ; %bb
280 ; GFX12-NEXT: v_swmmac_f32_16x16x32_bf8_fp8 v[6:13], v[0:1], v[2:5], v14
281 ; GFX12-NEXT: s_clause 0x1
282 ; GFX12-NEXT: global_store_b128 v[15:16], v[10:13], off offset:16
283 ; GFX12-NEXT: global_store_b128 v[15:16], v[6:9], off
284 ; GFX12-NEXT: s_endpgm
286 %res = call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v8f32.v2i32.v4i32.i16(<2 x i32> %A, <4 x i32> %B, <8 x float> %C, i16 %Index)
287 store <8 x float> %res, ptr addrspace(1) %out
291 define amdgpu_ps void @test_swmmac_f32_16x16x32_bf8_bf8(<2 x i32> %A, <4 x i32> %B, <8 x float> %C, i16 %Index, ptr addrspace(1) %out) {
292 ; GFX12-LABEL: test_swmmac_f32_16x16x32_bf8_bf8:
293 ; GFX12: ; %bb.0: ; %bb
294 ; GFX12-NEXT: v_swmmac_f32_16x16x32_bf8_bf8 v[6:13], v[0:1], v[2:5], v14
295 ; GFX12-NEXT: s_clause 0x1
296 ; GFX12-NEXT: global_store_b128 v[15:16], v[10:13], off offset:16
297 ; GFX12-NEXT: global_store_b128 v[15:16], v[6:9], off
298 ; GFX12-NEXT: s_endpgm
300 %res = call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v8f32.v2i32.v4i32.i16(<2 x i32> %A, <4 x i32> %B, <8 x float> %C, i16 %Index)
301 store <8 x float> %res, ptr addrspace(1) %out
305 declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v8f16(<8 x half>, <8 x half>, <8 x float>)
306 declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v8i16(<8 x i16>, <8 x i16>, <8 x float>)
307 declare <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half>, <8 x half>, <8 x half>, i1 immarg)
308 declare <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v8i16.v8i16(<8 x i16>, <8 x i16>, <8 x i16>, i1 immarg)
309 declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v2i32.(i1 immarg, <2 x i32>, i1 immarg, <2 x i32>, <8 x i32>, i1 immarg)
310 declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.i32(i1 immarg, i32, i1 immarg, i32, <8 x i32>, i1 immarg)
311 declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v8f32.v2i32(<2 x i32>, <2 x i32>, <8 x float>)
312 declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v8f32.v2i32(<2 x i32>, <2 x i32>, <8 x float>)
313 declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v8f32.v2i32(<2 x i32>, <2 x i32>, <8 x float>)
314 declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v8f32.v2i32(<2 x i32>, <2 x i32>, <8 x float>)
315 declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32(i1 immarg, <2 x i32>, i1 immarg, <2 x i32>, <8 x i32>, i1 immarg)
316 declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v8f32.v8f16.v16f16.i16(<8 x half>, <16 x half>, <8 x float>, i16)
317 declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v8f32.v8i16.v16i16.i16(<8 x i16>, <16 x i16>, <8 x float>, i16)
318 declare <8 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v8f16.v8f16.v16f16..i16(<8 x half>, <16 x half>, <8 x half>, i16)
319 declare <8 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v8i16.v8i16.v16i16.i16(<8 x i16>, <16 x i16>, <8 x i16>, i16)
320 declare <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v8i32.v2i32.v4i32.i16(i1 immarg, <2 x i32>, i1 immarg, <4 x i32>, <8 x i32>, i16 %Index, i1 immarg)
321 declare <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v8i32.i32.v2i32.i16(i1 immarg, i32, i1 immarg, <2 x i32>, <8 x i32>, i16 %Index, i1 immarg)
322 declare <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v8i32.v2i32.v4i32.i32(i1 immarg, <2 x i32>, i1 immarg, <4 x i32>, <8 x i32>, i32 %Index, i1 immarg)
323 declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v8f32.v2i32.v4i32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
324 declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v8f32.v2i32.v4i32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
325 declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v8f32.v2i32.v4i32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
326 declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v8f32.v2i32.v4i32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)