1 # RUN: llvm-mca -mtriple=amdgcn -mcpu=gfx950 --timeline --iterations=1 --timeline-max-cycles=0 < %s | FileCheck %s
4 # CHECK: Instructions: 133
5 # CHECK: Total Cycles: 1101
6 # CHECK: Total uOps: 133
8 v_mfma_f32_16x16x32_f16
a[0:3], a[0:3], a[0:3], a[0:3] blgp
:1
9 v_mfma_f32_16x16x32_f16
a[0:3], v
[0:3], v
[0:3], a[4:7]
10 v_mfma_f32_32x32x16_f16 v
[0:15], v
[0:3], v
[0:3], v
[0:15]
11 v_mfma_f32_32x32x16_f16
a[0:15], a[0:3], a[0:3], a[0:15] blgp
:2
12 v_mfma_f32_32x32x16_bf16 v
[0:15], v
[0:3], v
[0:3], v
[0:15]
13 v_mfma_f32_32x32x16_bf16
a[0:15], a[0:3], a[0:3], a[0:15] blgp
:2
14 v_mfma_i32_16x16x64_i8
a[0:3], a[0:3], a[0:3], a[0:3] blgp
:1
15 v_mfma_i32_16x16x64_i8
a[0:3], v
[0:3], v
[0:3], a[4:7]
16 v_mfma_i32_32x32x32_i8 v
[0:15], v
[0:3], v
[0:3], v
[0:15]
17 v_mfma_i32_32x32x32_i8
a[0:15], a[0:3], a[0:3], a[0:15] blgp
:2
18 v_mfma_f32_16x16x32_bf16
a[0:3], a[0:3], a[0:3], a[0:3] blgp
:1
19 v_mfma_f32_16x16x32_bf16
a[0:3], v
[0:3], v
[0:3], a[4:7]
21 v_mfma_ld_scale_b32 v0
, v0
23 ;; FIXME
: should have different cycle count depending on whether either matrix is
f8
25 v_mfma_f32_16x16x128_f8f6f4 v
[0:3], v
[4:11], v
[4:11], v
[0:3]
26 v_mfma_f32_16x16x128_f8f6f4 v
[0:3], v
[4:11], v
[4:11], v
[0:3] blgp
:1
27 v_mfma_f32_16x16x128_f8f6f4 v
[0:3], v
[4:11], v
[4:9], v
[0:3] blgp
:2
28 v_mfma_f32_16x16x128_f8f6f4 v
[0:3], v
[4:11], v
[4:9], v
[0:3] blgp
:3
29 v_mfma_f32_16x16x128_f8f6f4 v
[0:3], v
[4:11], v
[4:7], v
[0:3] blgp
:4
30 v_mfma_f32_16x16x128_f8f6f4 v
[0:3], v
[4:11], v
[4:11], v
[0:3] cbsz
:1
31 v_mfma_f32_16x16x128_f8f6f4 v
[0:3], v
[4:9], v
[4:11], v
[0:3] cbsz
:2
32 v_mfma_f32_16x16x128_f8f6f4 v
[0:3], v
[4:9], v
[4:11], v
[0:3] cbsz
:3
33 v_mfma_f32_16x16x128_f8f6f4 v
[0:3], v
[4:7], v
[4:11], v
[0:3] cbsz
:4
34 v_mfma_f32_16x16x128_f8f6f4 v
[0:3], v
[4:9], v
[4:11], v
[0:3] cbsz
:2 blgp
:1
35 v_mfma_f32_16x16x128_f8f6f4 v
[0:3], v
[4:11], v
[4:9], v
[0:3] cbsz
:1 blgp
:2
37 ;; FIXME
: should have different cycle count depending on whether either matrix is
f8
38 v_mfma_f32_32x32x64_f8f6f4 v
[0:15], v
[4:11], v
[4:11], v
[0:15]
39 v_mfma_f32_32x32x64_f8f6f4 v
[0:15], v
[4:11], v
[4:11], v
[0:15] blgp
:1
40 v_mfma_f32_32x32x64_f8f6f4 v
[0:15], v
[4:11], v
[4:9], v
[0:15] blgp
:2
41 v_mfma_f32_32x32x64_f8f6f4 v
[0:15], v
[4:11], v
[4:9], v
[0:15] blgp
:3
42 v_mfma_f32_32x32x64_f8f6f4 v
[0:15], v
[4:11], v
[4:7], v
[0:15] blgp
:4
43 v_mfma_f32_32x32x64_f8f6f4 v
[0:15], v
[4:11], v
[4:11], v
[0:15] cbsz
:1
44 v_mfma_f32_32x32x64_f8f6f4 v
[0:15], v
[4:9], v
[4:11], v
[0:15] cbsz
:2
45 v_mfma_f32_32x32x64_f8f6f4 v
[0:15], v
[4:9], v
[4:11], v
[0:15] cbsz
:3
46 v_mfma_f32_32x32x64_f8f6f4 v
[0:15], v
[4:7], v
[4:11], v
[0:15] cbsz
:4
47 v_mfma_f32_32x32x64_f8f6f4 v
[0:15], v
[4:9], v
[4:11], v
[0:15] cbsz
:2
48 v_mfma_f32_32x32x64_f8f6f4 v
[0:15], v
[4:11], v
[4:11], v
[0:15] blgp
:1
50 ;; FIXME
: should have different cycle count depending on whether either matrix is
f8
51 v_mfma_scale_f32_16x16x128_f8f6f4 v
[0:3], v
[4:11], v
[4:11], v
[0:3], v5
, v5
52 v_mfma_scale_f32_16x16x128_f8f6f4 v
[0:3], v
[4:11], v
[4:11], v
[0:3], v5
, v5 blgp
:1
53 v_mfma_scale_f32_16x16x128_f8f6f4 v
[0:3], v
[4:9], v
[4:9], v
[0:3], v5
, v5 cbsz
:2 blgp
:2
55 v_mfma_scale_f32_32x32x64_f8f6f4 v
[0:15], v
[4:11], v
[4:11], v
[0:15], v5
, v5
56 v_mfma_scale_f32_32x32x64_f8f6f4 v
[0:15], v
[4:9], v
[4:11], v
[0:15], v5
, v5 cbsz
:2 blgp
:1
57 v_mfma_scale_f32_32x32x64_f8f6f4 v
[0:15], v
[4:9], v
[4:9], v
[0:15], v5
, v5 cbsz
:2 blgp
:2
59 ;; TODO
: These results are wrong
60 v_smfmac_f32_16x16x64_f16 v
[10:13], a[2:5], v
[4:11], v3 cbsz
:3 abid
:1
61 v_smfmac_f32_32x32x32_f16 v
[10:25], a[2:5], v
[4:11], v3 cbsz
:3 abid
:1
62 v_smfmac_f32_16x16x64_bf16 v
[10:13], a[2:5], v
[4:11], v3 cbsz
:3 abid
:1
63 v_smfmac_f32_32x32x32_bf16 v
[10:25], a[2:5], v
[4:11], v3 cbsz
:3 abid
:1
64 v_smfmac_i32_16x16x128_i8 v
[10:13], a[2:5], v
[4:11], v3 cbsz
:3 abid
:1
65 v_smfmac_i32_32x32x64_i8 v
[10:25], a[2:5], v
[4:11], v3 cbsz
:3 abid
:1
67 v_smfmac_f32_16x16x128_bf8_bf8 v
[10:13], a[2:5], v
[4:11], v3 cbsz
:3 abid
:1
68 v_smfmac_f32_16x16x128_bf8_fp8 v
[10:13], a[2:5], v
[4:11], v3 cbsz
:3 abid
:1
69 v_smfmac_f32_16x16x128_fp8_bf8 v
[10:13], a[2:5], v
[4:11], v3 cbsz
:3 abid
:1
70 v_smfmac_f32_16x16x128_fp8_fp8 v
[10:13], a[2:5], v
[4:11], v3 cbsz
:3 abid
:1
72 v_smfmac_f32_32x32x64_bf8_bf8 v
[10:25], a[2:5], v
[4:11], v3 cbsz
:3 abid
:1
73 v_smfmac_f32_32x32x64_bf8_fp8 v
[10:25], a[2:5], v
[4:11], v3 cbsz
:3 abid
:1
74 v_smfmac_f32_32x32x64_fp8_bf8 v
[10:25], a[2:5], v
[4:11], v3 cbsz
:3 abid
:1
75 v_smfmac_f32_32x32x64_fp8_fp8 v
[10:25], a[2:5], v
[4:11], v3 cbsz
:3 abid
:1
77 v_mfma_f32_16x16x4_f32
a[0:3], v0
, v1
, a[2:5]
78 v_mfma_f32_16x16x4_f32 v
[0:3], v0
, v1
, v
[2:5]
80 v_mfma_f32_32x32x2_f32
a[0:15], v0
, v1
, a[18:33]
81 v_mfma_f32_32x32x2_f32 v
[0:15], v0
, v1
, v
[18:33]
83 v_mfma_f64_4x4x4_4b_f64
a[0:1], v
[0:1], a[2:3], a[2:3]
84 v_mfma_f64_4x4x4_4b_f64 v
[0:1], v
[0:1], v
[2:3], v
[2:3]
86 v_mfma_f64_16x16x4_f64
a[0:7], v
[0:1], v
[2:3], a[0:7]
87 v_mfma_f64_16x16x4_f64 v
[0:7], v
[0:1], v
[2:3], v
[0:7]
89 v_mfma_f32_16x16x16_f16 v
[0:3], v
[4:5], v
[6:7], v
[0:3]
90 v_mfma_f32_16x16x16_f16
a[0:3], v
[4:5], v
[6:7], a[0:3]
92 v_mfma_f32_32x32x8_f16 v
[0:15], v
[4:5], v
[6:7], v
[0:15]
93 v_mfma_f32_32x32x8_f16
a[0:15], v
[4:5], v
[6:7], a[0:15]
95 v_mfma_f32_16x16x16_bf16 v
[0:3], v
[4:5], v
[6:7], v
[0:3]
96 v_mfma_f32_16x16x16_bf16
a[0:3], v
[4:5], v
[6:7], a[0:3]
98 v_mfma_f32_32x32x8_bf16 v
[0:15], v
[4:5], v
[6:7], v
[0:15]
99 v_mfma_f32_32x32x8_bf16
a[0:15], v
[4:5], v
[6:7], a[0:15]
101 v_mfma_i32_16x16x32_i8 v
[0:3], v
[4:5], v
[6:7], v
[0:3]
102 v_mfma_i32_16x16x32_i8
a[0:3], v
[4:5], v
[6:7], a[0:3]
104 v_mfma_i32_32x32x16_i8 v
[0:15], v
[2:3], v
[4:5], v
[0:15]
105 v_mfma_i32_32x32x16_i8
a[0:15], v
[2:3], v
[4:5], a[0:15]
107 v_mfma_f32_4x4x4_16b_f16 v
[0:3], v
[0:1], v
[2:3], v
[2:5]
108 v_mfma_f32_4x4x4_16b_f16
a[0:3], v
[0:1], v
[2:3], a[2:5]
110 v_mfma_f32_16x16x4_4b_f16 v
[0:15], v
[2:3], v
[4:5], v
[18:33]
111 v_mfma_f32_16x16x4_4b_f16
a[0:15], v
[2:3], v
[4:5], a[18:33]
113 v_mfma_f32_32x32x4_2b_f16 v
[0:31], v
[0:1], v
[2:3], v
[34:65]
114 v_mfma_f32_32x32x4_2b_f16
a[0:31], v
[0:1], v
[2:3], a[34:65]
116 v_mfma_f32_4x4x4_16b_bf16 v
[0:3], v
[0:1], v
[2:3], v
[2:5]
117 v_mfma_f32_4x4x4_16b_bf16
a[0:3], v
[0:1], v
[2:3], a[2:5]
119 v_mfma_f32_16x16x4_4b_bf16 v
[0:15], v
[2:3], v
[4:5], v
[18:33]
120 v_mfma_f32_16x16x4_4b_bf16
a[0:15], v
[2:3], v
[4:5], a[18:33]
122 v_mfma_f32_32x32x4_2b_bf16 v
[0:31], v
[0:1], v
[2:3], v
[34:65]
123 v_mfma_f32_32x32x4_2b_bf16
a[0:31], v
[0:1], v
[2:3], a[34:65]
125 v_mfma_f32_4x4x1_16b_f32 v
[0:3], v0
, v1
, v
[2:5]
126 v_mfma_f32_4x4x1_16b_f32
a[0:3], v0
, v1
, a[2:5]
128 v_mfma_f32_16x16x1_4b_f32 v
[0:15], v0
, v1
, v
[18:33]
129 v_mfma_f32_16x16x1_4b_f32
a[0:15], v0
, v1
, a[18:33]
131 v_mfma_f32_16x16x4_f32 v
[0:3], v0
, v1
, v
[2:5]
132 v_mfma_f32_16x16x4_f32
a[0:3], v0
, v1
, a[2:5]
134 v_mfma_f32_32x32x1_2b_f32 v
[0:31], v0
, v1
, v
[34:65] blgp
:7
135 v_mfma_f32_32x32x1_2b_f32
a[0:31], v0
, v1
, a[34:65] blgp
:7
137 v_mfma_f32_32x32x2_f32 v
[0:15], v0
, v1
, v
[18:33]
138 v_mfma_f32_32x32x2_f32
a[0:15], v0
, v1
, a[18:33]
140 v_mfma_i32_4x4x4_16b_i8 v
[0:3], v0
, v1
, v
[2:5]
141 v_mfma_i32_4x4x4_16b_i8
a[0:3], v0
, v1
, a[2:5]
143 v_mfma_i32_16x16x4_4b_i8 v
[0:15], v0
, v1
, v
[18:33]
144 v_mfma_i32_16x16x4_4b_i8
a[0:15], v0
, v1
, a[18:33]
146 v_mfma_i32_32x32x4_2b_i8 v
[0:31], v0
, v1
, v
[34:65]
147 v_mfma_i32_32x32x4_2b_i8
a[0:31], v0
, v1
, a[34:65]
149 v_smfmac_f32_16x16x32_f16 v
[10:13], a[2:3], v
[4:7], v0 cbsz
:3 abid
:1
150 v_smfmac_f32_16x16x32_f16
a[10:13], v
[2:3], a[4:7], v1
152 v_smfmac_f32_32x32x16_f16 v
[10:25], a[2:3], v
[4:7], v2 cbsz
:3 abid
:1
153 v_smfmac_f32_32x32x16_f16
a[10:25], v
[2:3], a[4:7], v3
155 v_smfmac_f32_16x16x32_bf16 v
[10:13], a[2:3], v
[4:7], v4 cbsz
:3 abid
:1
156 v_smfmac_f32_16x16x32_bf16
a[10:13], v
[2:3], a[4:7], v5
158 v_smfmac_i32_16x16x64_i8 v
[10:13], a[2:3], v
[4:7], v8 cbsz
:3 abid
:1
159 v_smfmac_i32_16x16x64_i8
a[10:13], v
[2:3], a[4:7], v9
161 v_smfmac_i32_32x32x32_i8 v
[10:25], a[2:3], v
[4:7], v10 cbsz
:3 abid
:1
162 v_smfmac_i32_32x32x32_i8
a[10:25], v
[2:3], a[4:7], v11
164 v_mfma_f32_16x16x32_bf8_bf8 v
[0:3], v
[2:3], v
[4:5], v
[0:3]
165 v_mfma_f32_16x16x32_bf8_bf8
a[0:3], v
[2:3], v
[4:5], a[0:3]
167 v_mfma_f32_16x16x32_bf8_fp8 v
[0:3], v
[2:3], v
[4:5], v
[0:3]
168 v_mfma_f32_16x16x32_bf8_fp8
a[0:3], v
[2:3], v
[4:5], a[0:3]
170 v_mfma_f32_16x16x32_fp8_bf8 v
[0:3], v
[2:3], v
[4:5], v
[0:3]
171 v_mfma_f32_16x16x32_fp8_bf8
a[0:3], v
[2:3], v
[4:5], a[0:3]
173 v_mfma_f32_16x16x32_fp8_fp8 v
[0:3], v
[2:3], v
[4:5], v
[0:3]
174 v_mfma_f32_16x16x32_fp8_fp8
a[0:3], v
[2:3], v
[4:5], a[0:3]
176 v_mfma_f32_32x32x16_bf8_bf8 v
[0:15], v
[2:3], v
[4:5], v
[0:15]
177 v_mfma_f32_32x32x16_fp8_bf8 v
[0:15], v
[2:3], v
[4:5], v
[0:15]
178 v_mfma_f32_32x32x16_bf8_fp8 v
[0:15], v
[2:3], v
[4:5], v
[0:15]
179 v_mfma_f32_32x32x16_fp8_fp8 v
[0:15], v
[2:3], v
[4:5], v
[0:15]
181 v_smfmac_f32_16x16x64_bf8_bf8 v
[0:3], a[2:3], v
[4:7], v1 cbsz
:3 abid
:1
182 v_smfmac_f32_16x16x64_bf8_fp8 v
[0:3], a[2:3], v
[4:7], v1 cbsz
:3 abid
:1
183 v_smfmac_f32_16x16x64_fp8_bf8 v
[0:3], a[2:3], v
[4:7], v1 cbsz
:3 abid
:1
184 v_smfmac_f32_16x16x64_fp8_fp8 v
[0:3], a[2:3], v
[4:7], v1 cbsz
:3 abid
:1
186 v_smfmac_f32_32x32x32_bf8_bf8 v
[0:15], v
[2:3], v
[4:7], v1 cbsz
:3 abid
:1
187 v_smfmac_f32_32x32x32_bf8_fp8 v
[0:15], v
[2:3], v
[4:7], v1 cbsz
:3 abid
:1
188 v_smfmac_f32_32x32x32_fp8_bf8 v
[0:15], v
[2:3], v
[4:7], v1 cbsz
:3 abid
:1
189 v_smfmac_f32_32x32x32_fp8_fp8 v
[0:15], v
[2:3], v
[4:7], v1 cbsz
:3 abid
:1
191 # CHECK: [0] [1] [2] [3] [4] [5] [6] Instructions:
192 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_f16 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1
193 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[0:3], a[4:7]
194 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_f16 v[0:15], v[0:3], v[0:3], v[0:15]
195 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_f16 a[0:15], a[0:3], a[0:3], a[0:15] blgp:2
196 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_bf16 v[0:15], v[0:3], v[0:3], v[0:15]
197 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_bf16 a[0:15], a[0:3], a[0:3], a[0:15] blgp:2
198 # CHECK-NEXT: - - - - - - 4.00 v_mfma_i32_16x16x64_i8 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1
199 # CHECK-NEXT: - - - - - - 4.00 v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[0:3], a[4:7]
200 # CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_32x32x32_i8 v[0:15], v[0:3], v[0:3], v[0:15]
201 # CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_32x32x32_i8 a[0:15], a[0:3], a[0:3], a[0:15] blgp:2
202 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_bf16 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1
203 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[0:3], a[4:7]
204 # CHECK-NEXT: - - - - 1.00 - - v_mfma_ld_scale_b32 v0, v0
205 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3]
206 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] blgp:1
207 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:9], v[0:3] blgp:2
208 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:9], v[0:3] blgp:3
209 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:7], v[0:3] blgp:4
210 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] cbsz:1
211 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[4:11], v[0:3] cbsz:2
212 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[4:11], v[0:3] cbsz:3
213 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:7], v[4:11], v[0:3] cbsz:4
214 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[4:11], v[0:3] cbsz:2 blgp:1
215 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:9], v[0:3] cbsz:1 blgp:2
216 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15]
217 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] blgp:1
218 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:9], v[0:15] blgp:2
219 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:9], v[0:15] blgp:3
220 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:7], v[0:15] blgp:4
221 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] cbsz:1
222 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:9], v[4:11], v[0:15] cbsz:2
223 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:9], v[4:11], v[0:15] cbsz:3
224 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:7], v[4:11], v[0:15] cbsz:4
225 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:9], v[4:11], v[0:15] cbsz:2
226 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] blgp:1
227 # CHECK-NEXT: - - - - - - 4.00 v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3], v5, v5 op_sel_hi:[0,0,0]
228 # CHECK-NEXT: - - - - - - 4.00 v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3], v5, v5 op_sel_hi:[0,0,0] blgp:1
229 # CHECK-NEXT: - - - - - - 4.00 v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[4:9], v[0:3], v5, v5 op_sel_hi:[0,0,0] cbsz:2 blgp:2
230 # CHECK-NEXT: - - - - - - 8.00 v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15], v5, v5 op_sel_hi:[0,0,0]
231 # CHECK-NEXT: - - - - - - 8.00 v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[4:9], v[4:11], v[0:15], v5, v5 op_sel_hi:[0,0,0] cbsz:2 blgp:1
232 # CHECK-NEXT: - - - - - - 8.00 v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[4:9], v[4:9], v[0:15], v5, v5 op_sel_hi:[0,0,0] cbsz:2 blgp:2
233 # CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x64_f16 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1
234 # CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x32_f16 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1
235 # CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x64_bf16 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1
236 # CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x32_bf16 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1
237 # CHECK-NEXT: - - - - - - 4.00 v_smfmac_i32_16x16x128_i8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1
238 # CHECK-NEXT: - - - - - - 8.00 v_smfmac_i32_32x32x64_i8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1
239 # CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x128_bf8_bf8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1
240 # CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x128_bf8_fp8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1
241 # CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x128_fp8_bf8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1
242 # CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x128_fp8_fp8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1
243 # CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x64_bf8_bf8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1
244 # CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x64_bf8_fp8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1
245 # CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x64_fp8_bf8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1
246 # CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x64_fp8_fp8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1
247 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5]
248 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5]
249 # CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33]
250 # CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33]
251 # CHECK-NEXT: - - - - 4.00 - - v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3]
252 # CHECK-NEXT: - - - - 4.00 - - v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], v[2:3], v[2:3]
253 # CHECK-NEXT: - - - - 16.00 - - v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7]
254 # CHECK-NEXT: - - - - 16.00 - - v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7]
255 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x16_f16 v[0:3], v[4:5], v[6:7], v[0:3]
256 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x16_f16 a[0:3], v[4:5], v[6:7], a[0:3]
257 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x8_f16 v[0:15], v[4:5], v[6:7], v[0:15]
258 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x8_f16 a[0:15], v[4:5], v[6:7], a[0:15]
259 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x16_bf16 v[0:3], v[4:5], v[6:7], v[0:3]
260 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x16_bf16 a[0:3], v[4:5], v[6:7], a[0:3]
261 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x8_bf16 v[0:15], v[4:5], v[6:7], v[0:15]
262 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x8_bf16 a[0:15], v[4:5], v[6:7], a[0:15]
263 # CHECK-NEXT: - - - - - - 4.00 v_mfma_i32_16x16x32_i8 v[0:3], v[4:5], v[6:7], v[0:3]
264 # CHECK-NEXT: - - - - - - 4.00 v_mfma_i32_16x16x32_i8 a[0:3], v[4:5], v[6:7], a[0:3]
265 # CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_32x32x16_i8 v[0:15], v[2:3], v[4:5], v[0:15]
266 # CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_32x32x16_i8 a[0:15], v[2:3], v[4:5], a[0:15]
267 # CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x4_16b_f16 v[0:3], v[0:1], v[2:3], v[2:5]
268 # CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x4_16b_f16 a[0:3], v[0:1], v[2:3], a[2:5]
269 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_4b_f16 v[0:15], v[2:3], v[4:5], v[18:33]
270 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_4b_f16 a[0:15], v[2:3], v[4:5], a[18:33]
271 # CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x4_2b_f16 v[0:31], v[0:1], v[2:3], v[34:65]
272 # CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x4_2b_f16 a[0:31], v[0:1], v[2:3], a[34:65]
273 # CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x4_16b_bf16 v[0:3], v[0:1], v[2:3], v[2:5]
274 # CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x4_16b_bf16 a[0:3], v[0:1], v[2:3], a[2:5]
275 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_4b_bf16 v[0:15], v[2:3], v[4:5], v[18:33]
276 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_4b_bf16 a[0:15], v[2:3], v[4:5], a[18:33]
277 # CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[0:1], v[2:3], v[34:65]
278 # CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[0:1], v[2:3], a[34:65]
279 # CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x1_16b_f32 v[0:3], v0, v1, v[2:5]
280 # CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x1_16b_f32 a[0:3], v0, v1, a[2:5]
281 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x1_4b_f32 v[0:15], v0, v1, v[18:33]
282 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x1_4b_f32 a[0:15], v0, v1, a[18:33]
283 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5]
284 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5]
285 # CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, v[34:65] blgp:7
286 # CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[34:65] blgp:7
287 # CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33]
288 # CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33]
289 # CHECK-NEXT: - - - - - - 2.00 v_mfma_i32_4x4x4_16b_i8 v[0:3], v0, v1, v[2:5]
290 # CHECK-NEXT: - - - - - - 2.00 v_mfma_i32_4x4x4_16b_i8 a[0:3], v0, v1, a[2:5]
291 # CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_16x16x4_4b_i8 v[0:15], v0, v1, v[18:33]
292 # CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_16x16x4_4b_i8 a[0:15], v0, v1, a[18:33]
293 # CHECK-NEXT: - - - - - - 16.00 v_mfma_i32_32x32x4_2b_i8 v[0:31], v0, v1, v[34:65]
294 # CHECK-NEXT: - - - - - - 16.00 v_mfma_i32_32x32x4_2b_i8 a[0:31], v0, v1, a[34:65]
295 # CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x32_f16 v[10:13], a[2:3], v[4:7], v0 cbsz:3 abid:1
296 # CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x32_f16 a[10:13], v[2:3], a[4:7], v1
297 # CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x16_f16 v[10:25], a[2:3], v[4:7], v2 cbsz:3 abid:1
298 # CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x16_f16 a[10:25], v[2:3], a[4:7], v3
299 # CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x32_bf16 v[10:13], a[2:3], v[4:7], v4 cbsz:3 abid:1
300 # CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x32_bf16 a[10:13], v[2:3], a[4:7], v5
301 # CHECK-NEXT: - - - - - - 4.00 v_smfmac_i32_16x16x64_i8 v[10:13], a[2:3], v[4:7], v8 cbsz:3 abid:1
302 # CHECK-NEXT: - - - - - - 4.00 v_smfmac_i32_16x16x64_i8 a[10:13], v[2:3], a[4:7], v9
303 # CHECK-NEXT: - - - - - - 8.00 v_smfmac_i32_32x32x32_i8 v[10:25], a[2:3], v[4:7], v10 cbsz:3 abid:1
304 # CHECK-NEXT: - - - - - - 8.00 v_smfmac_i32_32x32x32_i8 a[10:25], v[2:3], a[4:7], v11
305 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_bf8_bf8 v[0:3], v[2:3], v[4:5], v[0:3]
306 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_bf8_bf8 a[0:3], v[2:3], v[4:5], a[0:3]
307 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_bf8_fp8 v[0:3], v[2:3], v[4:5], v[0:3]
308 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_bf8_fp8 a[0:3], v[2:3], v[4:5], a[0:3]
309 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_fp8_bf8 v[0:3], v[2:3], v[4:5], v[0:3]
310 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_fp8_bf8 a[0:3], v[2:3], v[4:5], a[0:3]
311 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_fp8_fp8 v[0:3], v[2:3], v[4:5], v[0:3]
312 # CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_fp8_fp8 a[0:3], v[2:3], v[4:5], a[0:3]
313 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_bf8_bf8 v[0:15], v[2:3], v[4:5], v[0:15]
314 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_fp8_bf8 v[0:15], v[2:3], v[4:5], v[0:15]
315 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_bf8_fp8 v[0:15], v[2:3], v[4:5], v[0:15]
316 # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_fp8_fp8 v[0:15], v[2:3], v[4:5], v[0:15]
317 # CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x64_bf8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1
318 # CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x64_bf8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1
319 # CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x64_fp8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1
320 # CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x64_fp8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1
321 # CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x32_bf8_bf8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1
322 # CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x32_bf8_fp8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1
323 # CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x32_fp8_bf8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1
324 # CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x32_fp8_fp8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1