1 // REQUIRES
: amdgpu-registered-target
2 // RUN
: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx908 -DMFMA_GFX908_TESTS -S -emit-llvm -o - %s | FileCheck %s --check-prefix
=CHECK-GFX908
3 // RUN
: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -DMFMA_GFX90A_TESTS -S -emit-llvm -o - %s | FileCheck %s --check-prefix
=CHECK-GFX90A
4 // RUN
: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx940 -DMFMA_GFX940_TESTS -S -emit-llvm -o - %s | FileCheck %s --check-prefix
=CHECK-GFX940
6 #pragma OPENCL EXTENSION cl_khr_fp64
:enable
8 typedef float v2f __attribute__
((ext_vector_type(2)));
9 typedef float v4f __attribute__
((ext_vector_type(4)));
10 typedef float v16f __attribute__
((ext_vector_type(16)));
11 typedef float v32f __attribute__
((ext_vector_type(32)));
12 typedef half v4h __attribute__
((ext_vector_type(4)));
13 typedef half v8h __attribute__
((ext_vector_type(8)));
14 typedef half v16h __attribute__
((ext_vector_type(16)));
15 typedef half v32h __attribute__
((ext_vector_type(32)));
16 typedef int v2i __attribute__
((ext_vector_type(2)));
17 typedef int v4i __attribute__
((ext_vector_type(4)));
18 typedef int v16i __attribute__
((ext_vector_type(16)));
19 typedef int v32i __attribute__
((ext_vector_type(32)));
20 typedef short v2s __attribute__
((ext_vector_type(2)));
21 typedef short v4s __attribute__
((ext_vector_type(4)));
22 typedef short v8s __attribute__
((ext_vector_type(8)));
23 typedef short v16s __attribute__
((ext_vector_type(16)));
24 typedef short v32s __attribute__
((ext_vector_type(32)));
25 typedef double v4d __attribute__
((ext_vector_type(4)));
28 #ifdef MFMA_GFX908_TESTS
30 // CHECK-GFX908-LABEL
: @test_mfma_f32_32x32x1f32
31 // CHECK-GFX908
: call
<32 x float
> @llvm.amdgcn.mfma.f32.32x32x1f32
(float %a
, float %b
, <32 x float
> %c
, i32
0, i32
0, i32
0)
32 void test_mfma_f32_32x32x1f32
(global v32f
* out
, float a
, float b
, v32f c
)
34 *out
= __builtin_amdgcn_mfma_f32_32x32x1f32
(a, b
, c
, 0, 0, 0);
37 // CHECK-GFX908-LABEL
: @test_mfma_f32_16x16x1f32
38 // CHECK-GFX908
: call
<16 x float
> @llvm.amdgcn.mfma.f32.16x16x1f32
(float %a
, float %b
, <16 x float
> %c
, i32
0, i32
0, i32
0)
39 void test_mfma_f32_16x16x1f32
(global v16f
* out
, float a
, float b
, v16f c
)
41 *out
= __builtin_amdgcn_mfma_f32_16x16x1f32
(a, b
, c
, 0, 0, 0);
44 // CHECK-GFX908-LABEL
: @test_mfma_f32_4x4x1f32
45 // CHECK-GFX908
: call
<4 x float
> @llvm.amdgcn.mfma.f32.4x4x1f32
(float %a
, float %b
, <4 x float
> %c
, i32
0, i32
0, i32
0)
46 void test_mfma_f32_4x4x1f32
(global v4f
* out
, float a
, float b
, v4f c
)
48 *out
= __builtin_amdgcn_mfma_f32_4x4x1f32
(a, b
, c
, 0, 0, 0);
51 // CHECK-GFX908-LABEL
: @test_mfma_f32_32x32x2f32
52 // CHECK-GFX908
: call
<16 x float
> @llvm.amdgcn.mfma.f32.32x32x2f32
(float %a
, float %b
, <16 x float
> %c
, i32
0, i32
0, i32
0)
53 void test_mfma_f32_32x32x2f32
(global v16f
* out
, float a
, float b
, v16f c
)
55 *out
= __builtin_amdgcn_mfma_f32_32x32x2f32
(a, b
, c
, 0, 0, 0);
58 // CHECK-GFX908-LABEL
: @test_mfma_f32_16x16x4f32
59 // CHECK-GFX908
: call
<4 x float
> @llvm.amdgcn.mfma.f32.16x16x4f32
(float %a
, float %b
, <4 x float
> %c
, i32
0, i32
0, i32
0)
60 void test_mfma_f32_16x16x4f32
(global v4f
* out
, float a
, float b
, v4f c
)
62 *out
= __builtin_amdgcn_mfma_f32_16x16x4f32
(a, b
, c
, 0, 0, 0);
65 // CHECK-GFX908-LABEL
: @test_mfma_f32_32x32x4f16
66 // CHECK-GFX908
: call
<32 x float
> @llvm.amdgcn.mfma.f32.32x32x4f16
(<4 x half
> %a
, <4 x half
> %b
, <32 x float
> %c
, i32
0, i32
0, i32
0)
67 void test_mfma_f32_32x32x4f16
(global v32f
* out
, v4h a
, v4h b
, v32f c
)
69 *out
= __builtin_amdgcn_mfma_f32_32x32x4f16
(a, b
, c
, 0, 0, 0);
72 // CHECK-GFX908-LABEL
: @test_mfma_f32_16x16x4f16
73 // CHECK-GFX908
: call
<16 x float
> @llvm.amdgcn.mfma.f32.16x16x4f16
(<4 x half
> %a
, <4 x half
> %b
, <16 x float
> %c
, i32
0, i32
0, i32
0)
74 void test_mfma_f32_16x16x4f16
(global v16f
* out
, v4h a
, v4h b
, v16f c
)
76 *out
= __builtin_amdgcn_mfma_f32_16x16x4f16
(a, b
, c
, 0, 0, 0);
79 // CHECK-GFX908-LABEL
: @test_mfma_f32_4x4x4f16
80 // CHECK-GFX908
: call
<4 x float
> @llvm.amdgcn.mfma.f32.4x4x4f16
(<4 x half
> %a
, <4 x half
> %b
, <4 x float
> %c
, i32
0, i32
0, i32
0)
81 void test_mfma_f32_4x4x4f16
(global v4f
* out
, v4h a
, v4h b
, v4f c
)
83 *out
= __builtin_amdgcn_mfma_f32_4x4x4f16
(a, b
, c
, 0, 0, 0);
86 // CHECK-GFX908-LABEL
: @test_mfma_f32_32x32x8f16
87 // CHECK-GFX908
: call
<16 x float
> @llvm.amdgcn.mfma.f32.32x32x8f16
(<4 x half
> %a
, <4 x half
> %b
, <16 x float
> %c
, i32
0, i32
0, i32
0)
88 void test_mfma_f32_32x32x8f16
(global v16f
* out
, v4h a
, v4h b
, v16f c
)
90 *out
= __builtin_amdgcn_mfma_f32_32x32x8f16
(a, b
, c
, 0, 0, 0);
93 // CHECK-GFX908-LABEL
: @test_mfma_f32_16x16x16f16
94 // CHECK-GFX908
: call
<4 x float
> @llvm.amdgcn.mfma.f32.16x16x16f16
(<4 x half
> %a
, <4 x half
> %b
, <4 x float
> %c
, i32
0, i32
0, i32
0)
95 void test_mfma_f32_16x16x16f16
(global v4f
* out
, v4h a
, v4h b
, v4f c
)
97 *out
= __builtin_amdgcn_mfma_f32_16x16x16f16
(a, b
, c
, 0, 0, 0);
100 // CHECK-GFX908-LABEL
: @test_mfma_i32_32x32x4i8
101 // CHECK-GFX908
: call
<32 x i32
> @llvm.amdgcn.mfma.i32.32x32x4i8
(i32 %a
, i32 %b
, <32 x i32
> %c
, i32
0, i32
0, i32
0)
102 void test_mfma_i32_32x32x4i8
(global v32i
* out
, int a
, int b
, v32i c
)
104 *out
= __builtin_amdgcn_mfma_i32_32x32x4i8
(a, b
, c
, 0, 0, 0);
107 // CHECK-GFX908-LABEL
: @test_mfma_i32_16x16x4i8
108 // CHECK-GFX908
: call
<16 x i32
> @llvm.amdgcn.mfma.i32.16x16x4i8
(i32 %a
, i32 %b
, <16 x i32
> %c
, i32
0, i32
0, i32
0)
109 void test_mfma_i32_16x16x4i8
(global v16i
* out
, int a
, int b
, v16i c
)
111 *out
= __builtin_amdgcn_mfma_i32_16x16x4i8
(a, b
, c
, 0, 0, 0);
114 // CHECK-GFX908-LABEL
: @test_mfma_i32_4x4x4i8
115 // CHECK-GFX908
: call
<4 x i32
> @llvm.amdgcn.mfma.i32.4x4x4i8
(i32 %a
, i32 %b
, <4 x i32
> %c
, i32
0, i32
0, i32
0)
116 void test_mfma_i32_4x4x4i8
(global v4i
* out
, int a
, int b
, v4i c
)
118 *out
= __builtin_amdgcn_mfma_i32_4x4x4i8
(a, b
, c
, 0, 0, 0);
121 // CHECK-GFX908-LABEL
: @test_mfma_i32_32x32x8i8
122 // CHECK-GFX908
: call
<16 x i32
> @llvm.amdgcn.mfma.i32.32x32x8i8
(i32 %a
, i32 %b
, <16 x i32
> %c
, i32
0, i32
0, i32
0)
123 void test_mfma_i32_32x32x8i8
(global v16i
* out
, int a
, int b
, v16i c
)
125 *out
= __builtin_amdgcn_mfma_i32_32x32x8i8
(a, b
, c
, 0, 0, 0);
128 // CHECK-GFX908-LABEL
: @test_mfma_i32_16x16x16i8
129 // CHECK-GFX908
: call
<4 x i32
> @llvm.amdgcn.mfma.i32.16x16x16i8
(i32 %a
, i32 %b
, <4 x i32
> %c
, i32
0, i32
0, i32
0)
130 void test_mfma_i32_16x16x16i8
(global v4i
* out
, int a
, int b
, v4i c
)
132 *out
= __builtin_amdgcn_mfma_i32_16x16x16i8
(a, b
, c
, 0, 0, 0);
135 // CHECK-GFX908-LABEL
: @test_mfma_f32_32x32x2bf16
136 // CHECK-GFX908
: call
<32 x float
> @llvm.amdgcn.mfma.f32.32x32x2bf16
(<2 x i16
> %a
, <2 x i16
> %b
, <32 x float
> %c
, i32
0, i32
0, i32
0)
137 void test_mfma_f32_32x32x2bf16
(global v32f
* out
, v2s a
, v2s b
, v32f c
)
139 *out
= __builtin_amdgcn_mfma_f32_32x32x2bf16
(a, b
, c
, 0, 0, 0);
142 // CHECK-GFX908-LABEL
: @test_mfma_f32_16x16x2bf16
143 // CHECK-GFX908
: call
<16 x float
> @llvm.amdgcn.mfma.f32.16x16x2bf16
(<2 x i16
> %a
, <2 x i16
> %b
, <16 x float
> %c
, i32
0, i32
0, i32
0)
144 void test_mfma_f32_16x16x2bf16
(global v16f
* out
, v2s a
, v2s b
, v16f c
)
146 *out
= __builtin_amdgcn_mfma_f32_16x16x2bf16
(a, b
, c
, 0, 0, 0);
149 // CHECK-GFX908-LABEL
: @test_mfma_f32_4x4x2bf16
150 // CHECK-GFX908
: call
<4 x float
> @llvm.amdgcn.mfma.f32.4x4x2bf16
(<2 x i16
> %a
, <2 x i16
> %b
, <4 x float
> %c
, i32
0, i32
0, i32
0)
151 void test_mfma_f32_4x4x2bf16
(global v4f
* out
, v2s a
, v2s b
, v4f c
)
153 *out
= __builtin_amdgcn_mfma_f32_4x4x2bf16
(a, b
, c
, 0, 0, 0);
156 // CHECK-GFX908-LABEL
: @test_mfma_f32_32x32x4bf16
157 // CHECK-GFX908
: call
<16 x float
> @llvm.amdgcn.mfma.f32.32x32x4bf16
(<2 x i16
> %a
, <2 x i16
> %b
, <16 x float
> %c
, i32
0, i32
0, i32
0)
158 void test_mfma_f32_32x32x4bf16
(global v16f
* out
, v2s a
, v2s b
, v16f c
)
160 *out
= __builtin_amdgcn_mfma_f32_32x32x4bf16
(a, b
, c
, 0, 0, 0);
163 // CHECK-GFX908-LABEL
: @test_mfma_f32_16x16x8bf16
164 // CHECK-GFX908
: call
<4 x float
> @llvm.amdgcn.mfma.f32.16x16x8bf16
(<2 x i16
> %a
, <2 x i16
> %b
, <4 x float
> %c
, i32
0, i32
0, i32
0)
165 void test_mfma_f32_16x16x8bf16
(global v4f
* out
, v2s a
, v2s b
, v4f c
)
167 *out
= __builtin_amdgcn_mfma_f32_16x16x8bf16
(a, b
, c
, 0, 0, 0);
170 #endif
// MFMA_GFX908_TESTS
172 #ifdef MFMA_GFX90A_TESTS
174 // CHECK-GFX90A-LABEL
: @test_mfma_f32_32x32x4bf16_1k
175 // CHECK-GFX90A
: call
<32 x float
> @llvm.amdgcn.mfma.f32.32x32x4bf16.1k
(<4 x i16
> %a
, <4 x i16
> %b
, <32 x float
> %c
, i32
0, i32
0, i32
0)
176 void test_mfma_f32_32x32x4bf16_1k
(global v32f
* out
, v4s a
, v4s b
, v32f c
)
178 *out
= __builtin_amdgcn_mfma_f32_32x32x4bf16_1k
(a, b
, c
, 0, 0, 0);
181 // CHECK-GFX90A-LABEL
: @test_mfma_f32_16x16x4bf16_1k
182 // CHECK-GFX90A
: call
<16 x float
> @llvm.amdgcn.mfma.f32.16x16x4bf16.1k
(<4 x i16
> %a
, <4 x i16
> %b
, <16 x float
> %c
, i32
0, i32
0, i32
0)
183 void test_mfma_f32_16x16x4bf16_1k
(global v16f
* out
, v4s a
, v4s b
, v16f c
)
185 *out
= __builtin_amdgcn_mfma_f32_16x16x4bf16_1k
(a, b
, c
, 0, 0, 0);
188 // CHECK-GFX90A-LABEL
: @test_mfma_f32_4x4x4bf16_1k
189 // CHECK-GFX90A
: call
<4 x float
> @llvm.amdgcn.mfma.f32.4x4x4bf16.1k
(<4 x i16
> %a
, <4 x i16
> %b
, <4 x float
> %c
, i32
0, i32
0, i32
0)
190 void test_mfma_f32_4x4x4bf16_1k
(global v4f
* out
, v4s a
, v4s b
, v4f c
)
192 *out
= __builtin_amdgcn_mfma_f32_4x4x4bf16_1k
(a, b
, c
, 0, 0, 0);
195 // CHECK-GFX90A-LABEL
: @test_mfma_f32_32x32x8bf16_1k
196 // CHECK-GFX90A
: call
<16 x float
> @llvm.amdgcn.mfma.f32.32x32x8bf16.1k
(<4 x i16
> %a
, <4 x i16
> %b
, <16 x float
> %c
, i32
0, i32
0, i32
0)
197 void test_mfma_f32_32x32x8bf16_1k
(global v16f
* out
, v4s a
, v4s b
, v16f c
)
199 *out
= __builtin_amdgcn_mfma_f32_32x32x8bf16_1k
(a, b
, c
, 0, 0, 0);
202 // CHECK-GFX90A-LABEL
: @test_mfma_f32_16x16x16bf16_1k
203 // CHECK-GFX90A
: call
<4 x float
> @llvm.amdgcn.mfma.f32.16x16x16bf16.1k
(<4 x i16
> %a
, <4 x i16
> %b
, <4 x float
> %c
, i32
0, i32
0, i32
0)
204 void test_mfma_f32_16x16x16bf16_1k
(global v4f
* out
, v4s a
, v4s b
, v4f c
)
206 *out
= __builtin_amdgcn_mfma_f32_16x16x16bf16_1k
(a, b
, c
, 0, 0, 0);
209 // CHECK-GFX90A-LABEL
: @test_mfma_f64_16x16x4f64
210 // CHECK-GFX90A
: call
<4 x double
> @llvm.amdgcn.mfma.f64.16x16x4f64
(double %a
, double %b
, <4 x double
> %c
, i32
0, i32
0, i32
0)
211 void test_mfma_f64_16x16x4f64
(global v4d
* out
, double a
, double b
, v4d c
)
213 *out
= __builtin_amdgcn_mfma_f64_16x16x4f64
(a, b
, c
, 0, 0, 0);
216 // CHECK-GFX90A-LABEL
: @test_mfma_f64_4x4x4f64
217 // CHECK-GFX90A
: call double
@llvm.amdgcn.mfma.f64.4x4x4f64
(double %a
, double %b
, double %c
, i32
0, i32
0, i32
0)
218 void test_mfma_f64_4x4x4f64
(global double
* out
, double a
, double b
, double c
)
220 *out
= __builtin_amdgcn_mfma_f64_4x4x4f64
(a, b
, c
, 0, 0, 0);
223 #endif
// MFMA_GFX90A_TESTS
225 #ifdef MFMA_GFX940_TESTS
226 // CHECK-GFX940-LABEL
: @test_mfma_i32_16x16x32_i8
227 // CHECK-GFX940
: call
<4 x i32
> @llvm.amdgcn.mfma.i32.16x16x32.i8
(i64 %a
, i64 %b
, <4 x i32
> %c
, i32
0, i32
0, i32
0)
228 void test_mfma_i32_16x16x32_i8
(global v4i
* out
, long a
, long b
, v4i c
)
230 *out
= __builtin_amdgcn_mfma_i32_16x16x32_i8
(a, b
, c
, 0, 0, 0);
233 // CHECK-GFX940-LABEL
: @test_mfma_i32_32x32x16_i8
234 // CHECK-GFX940
: call
<16 x i32
> @llvm.amdgcn.mfma.i32.32x32x16.i8
(i64 %a
, i64 %b
, <16 x i32
> %c
, i32
0, i32
0, i32
0)
235 void test_mfma_i32_32x32x16_i8
(global v16i
* out
, long a
, long b
, v16i c
)
237 *out
= __builtin_amdgcn_mfma_i32_32x32x16_i8
(a, b
, c
, 0, 0, 0);
240 // CHECK-GFX940-LABEL
: @test_mfma_f32_16x16x8_xf32
241 // CHECK-GFX940
: call
<4 x float
> @llvm.amdgcn.mfma.f32.16x16x8.xf32
(<2 x float
> %a
, <2 x float
> %b
, <4 x float
> %c
, i32
0, i32
0, i32
0)
242 void test_mfma_f32_16x16x8_xf32
(global v4f
* out
, v2f a
, v2f b
, v4f c
)
244 *out
= __builtin_amdgcn_mfma_f32_16x16x8_xf32
(a, b
, c
, 0, 0, 0);
247 // CHECK-GFX940-LABEL
: @test_mfma_f32_32x32x4_xf32
248 // CHECK-GFX940
: call
<16 x float
> @llvm.amdgcn.mfma.f32.32x32x4.xf32
(<2 x float
> %a
, <2 x float
> %b
, <16 x float
> %c
, i32
0, i32
0, i32
0)
249 void test_mfma_f32_32x32x4_xf32
(global v16f
* out
, v2f a
, v2f b
, v16f c
)
251 *out
= __builtin_amdgcn_mfma_f32_32x32x4_xf32
(a, b
, c
, 0, 0, 0);
254 // CHECK-GFX940-LABEL
: @test_mfma_f32_16x16x32_bf8_bf8
255 // CHECK-GFX940
: call
<4 x float
> @llvm.amdgcn.mfma.f32.16x16x32.bf8.bf8
(i64 %a
, i64 %b
, <4 x float
> %c
, i32
0, i32
0, i32
0)
256 void test_mfma_f32_16x16x32_bf8_bf8
(global v4f
* out
, long a
, long b
, v4f c
)
258 *out
= __builtin_amdgcn_mfma_f32_16x16x32_bf8_bf8
(a, b
, c
, 0, 0, 0);
261 // CHECK-GFX940-LABEL
: @test_mfma_f32_16x16x32_bf8_fp8
262 // CHECK-GFX940
: call
<4 x float
> @llvm.amdgcn.mfma.f32.16x16x32.bf8.fp8
(i64 %a
, i64 %b
, <4 x float
> %c
, i32
0, i32
0, i32
0)
263 void test_mfma_f32_16x16x32_bf8_fp8
(global v4f
* out
, long a
, long b
, v4f c
)
265 *out
= __builtin_amdgcn_mfma_f32_16x16x32_bf8_fp8
(a, b
, c
, 0, 0, 0);
268 // CHECK-GFX940-LABEL
: @test_mfma_f32_16x16x32_fp8_bf8
269 // CHECK-GFX940
: call
<4 x float
> @llvm.amdgcn.mfma.f32.16x16x32.fp8.bf8
(i64 %a
, i64 %b
, <4 x float
> %c
, i32
0, i32
0, i32
0)
270 void test_mfma_f32_16x16x32_fp8_bf8
(global v4f
* out
, long a
, long b
, v4f c
)
272 *out
= __builtin_amdgcn_mfma_f32_16x16x32_fp8_bf8
(a, b
, c
, 0, 0, 0);
275 // CHECK-GFX940-LABEL
: @test_mfma_f32_16x16x32_fp8_fp8
276 // CHECK-GFX940
: call
<4 x float
> @llvm.amdgcn.mfma.f32.16x16x32.fp8.fp8
(i64 %a
, i64 %b
, <4 x float
> %c
, i32
0, i32
0, i32
0)
277 void test_mfma_f32_16x16x32_fp8_fp8
(global v4f
* out
, long a
, long b
, v4f c
)
279 *out
= __builtin_amdgcn_mfma_f32_16x16x32_fp8_fp8
(a, b
, c
, 0, 0, 0);
282 // CHECK-GFX940-LABEL
: @test_mfma_f32_32x32x16_bf8_bf8
283 // CHECK-GFX940
: call
<16 x float
> @llvm.amdgcn.mfma.f32.32x32x16.bf8.bf8
(i64 %a
, i64 %b
, <16 x float
> %c
, i32
0, i32
0, i32
0)
284 void test_mfma_f32_32x32x16_bf8_bf8
(global v16f
* out
, long a
, long b
, v16f c
)
286 *out
= __builtin_amdgcn_mfma_f32_32x32x16_bf8_bf8
(a, b
, c
, 0, 0, 0);
289 // CHECK-GFX940-LABEL
: @test_mfma_f32_32x32x16_bf8_fp8
290 // CHECK-GFX940
: call
<16 x float
> @llvm.amdgcn.mfma.f32.32x32x16.bf8.fp8
(i64 %a
, i64 %b
, <16 x float
> %c
, i32
0, i32
0, i32
0)
291 void test_mfma_f32_32x32x16_bf8_fp8
(global v16f
* out
, long a
, long b
, v16f c
)
293 *out
= __builtin_amdgcn_mfma_f32_32x32x16_bf8_fp8
(a, b
, c
, 0, 0, 0);
296 // CHECK-GFX940-LABEL
: @test_mfma_f32_32x32x16_fp8_bf8
297 // CHECK-GFX940
: call
<16 x float
> @llvm.amdgcn.mfma.f32.32x32x16.fp8.bf8
(i64 %a
, i64 %b
, <16 x float
> %c
, i32
0, i32
0, i32
0)
298 void test_mfma_f32_32x32x16_fp8_bf8
(global v16f
* out
, long a
, long b
, v16f c
)
300 *out
= __builtin_amdgcn_mfma_f32_32x32x16_fp8_bf8
(a, b
, c
, 0, 0, 0);
303 // CHECK-GFX940-LABEL
: @test_mfma_f32_32x32x16_fp8_fp8
304 // CHECK-GFX940
: call
<16 x float
> @llvm.amdgcn.mfma.f32.32x32x16.fp8.fp8
(i64 %a
, i64 %b
, <16 x float
> %c
, i32
0, i32
0, i32
0)
305 void test_mfma_f32_32x32x16_fp8_fp8
(global v16f
* out
, long a
, long b
, v16f c
)
307 *out
= __builtin_amdgcn_mfma_f32_32x32x16_fp8_fp8
(a, b
, c
, 0, 0, 0);
310 // CHECK-GFX940-LABEL
: @test_smfmac_f32_16x16x32_f16
311 // CHECK-GFX940
: call
<4 x float
> @llvm.amdgcn.smfmac.f32.16x16x32.f16
(<4 x half
> %a
, <8 x half
> %b
, <4 x float
> %c
, i32 %idx
, i32
0, i32
0)
312 void test_smfmac_f32_16x16x32_f16
(global v4f
* out
, v4h a
, v8h b
, v4f c
, int idx
)
314 *out
= __builtin_amdgcn_smfmac_f32_16x16x32_f16
(a, b
, c
, idx
, 0, 0);
317 // CHECK-GFX940-LABEL
: @test_smfmac_f32_32x32x16_f16
318 // CHECK-GFX940
: call
<16 x float
> @llvm.amdgcn.smfmac.f32.32x32x16.f16
(<4 x half
> %a
, <8 x half
> %b
, <16 x float
> %c
, i32 %idx
, i32
0, i32
0)
319 void test_smfmac_f32_32x32x16_f16
(global v16f
* out
, v4h a
, v8h b
, v16f c
, int idx
)
321 *out
= __builtin_amdgcn_smfmac_f32_32x32x16_f16
(a, b
, c
, idx
, 0, 0);
324 // CHECK-GFX940-LABEL
: @test_smfmac_f32_16x16x32_bf16
325 // CHECK-GFX940
: call
<4 x float
> @llvm.amdgcn.smfmac.f32.16x16x32.bf16
(<4 x i16
> %a
, <8 x i16
> %b
, <4 x float
> %c
, i32 %idx
, i32
0, i32
0)
326 void test_smfmac_f32_16x16x32_bf16
(global v4f
* out
, v4s a
, v8s b
, v4f c
, int idx
)
328 *out
= __builtin_amdgcn_smfmac_f32_16x16x32_bf16
(a, b
, c
, idx
, 0, 0);
331 // CHECK-GFX940-LABEL
: @test_smfmac_f32_32x32x16_bf16
332 // CHECK-GFX940
: call
<16 x float
> @llvm.amdgcn.smfmac.f32.32x32x16.bf16
(<4 x i16
> %a
, <8 x i16
> %b
, <16 x float
> %c
, i32 %idx
, i32
0, i32
0)
333 void test_smfmac_f32_32x32x16_bf16
(global v16f
* out
, v4s a
, v8s b
, v16f c
, int idx
)
335 *out
= __builtin_amdgcn_smfmac_f32_32x32x16_bf16
(a, b
, c
, idx
, 0, 0);
338 // CHECK-GFX940-LABEL
: @test_smfmac_i32_16x16x64_i8
339 // CHECK-GFX940
: call
<4 x i32
> @llvm.amdgcn.smfmac.i32.16x16x64.i8
(<2 x i32
> %a
, <4 x i32
> %b
, <4 x i32
> %c
, i32 %idx
, i32
0, i32
0)
340 void test_smfmac_i32_16x16x64_i8
(global v4i
* out
, v2i a
, v4i b
, v4i c
, int idx
)
342 *out
= __builtin_amdgcn_smfmac_i32_16x16x64_i8
(a, b
, c
, idx
, 0, 0);
345 // CHECK-GFX940-LABEL
: @test_smfmac_i32_32x32x32_i8
346 // CHECK-GFX940
: call
<16 x i32
> @llvm.amdgcn.smfmac.i32.32x32x32.i8
(<2 x i32
> %a
, <4 x i32
> %b
, <16 x i32
> %c
, i32 %idx
, i32
0, i32
0)
347 void test_smfmac_i32_32x32x32_i8
(global v16i
* out
, v2i a
, v4i b
, v16i c
, int idx
)
349 *out
= __builtin_amdgcn_smfmac_i32_32x32x32_i8
(a, b
, c
, idx
, 0, 0);
352 // CHECK-GFX940-LABEL
: @test_smfmac_f32_16x16x64_bf8_bf8
353 // CHECK-GFX940
: call
<4 x float
> @llvm.amdgcn.smfmac.f32.16x16x64.bf8.bf8
(<2 x i32
> %a
, <4 x i32
> %b
, <4 x float
> %c
, i32 %idx
, i32
0, i32
0)
354 void test_smfmac_f32_16x16x64_bf8_bf8
(global v4f
* out
, v2i a
, v4i b
, v4f c
, int idx
)
356 *out
= __builtin_amdgcn_smfmac_f32_16x16x64_bf8_bf8
(a, b
, c
, idx
, 0, 0);
359 // CHECK-GFX940-LABEL
: @test_smfmac_f32_16x16x64_bf8_fp8
360 // CHECK-GFX940
: call
<4 x float
> @llvm.amdgcn.smfmac.f32.16x16x64.bf8.fp8
(<2 x i32
> %a
, <4 x i32
> %b
, <4 x float
> %c
, i32 %idx
, i32
0, i32
0)
361 void test_smfmac_f32_16x16x64_bf8_fp8
(global v4f
* out
, v2i a
, v4i b
, v4f c
, int idx
)
363 *out
= __builtin_amdgcn_smfmac_f32_16x16x64_bf8_fp8
(a, b
, c
, idx
, 0, 0);
366 // CHECK-GFX940-LABEL
: @test_smfmac_f32_16x16x64_fp8_bf8
367 // CHECK-GFX940
: call
<4 x float
> @llvm.amdgcn.smfmac.f32.16x16x64.fp8.bf8
(<2 x i32
> %a
, <4 x i32
> %b
, <4 x float
> %c
, i32 %idx
, i32
0, i32
0)
368 void test_smfmac_f32_16x16x64_fp8_bf8
(global v4f
* out
, v2i a
, v4i b
, v4f c
, int idx
)
370 *out
= __builtin_amdgcn_smfmac_f32_16x16x64_fp8_bf8
(a, b
, c
, idx
, 0, 0);
373 // CHECK-GFX940-LABEL
: @test_smfmac_f32_16x16x64_fp8_fp8
374 // CHECK-GFX940
: call
<4 x float
> @llvm.amdgcn.smfmac.f32.16x16x64.fp8.fp8
(<2 x i32
> %a
, <4 x i32
> %b
, <4 x float
> %c
, i32 %idx
, i32
0, i32
0)
375 void test_smfmac_f32_16x16x64_fp8_fp8
(global v4f
* out
, v2i a
, v4i b
, v4f c
, int idx
)
377 *out
= __builtin_amdgcn_smfmac_f32_16x16x64_fp8_fp8
(a, b
, c
, idx
, 0, 0);
380 // CHECK-GFX940-LABEL
: @test_smfmac_f32_32x32x32_bf8_bf8
381 // CHECK-GFX940
: call
<16 x float
> @llvm.amdgcn.smfmac.f32.32x32x32.bf8.bf8
(<2 x i32
> %a
, <4 x i32
> %b
, <16 x float
> %c
, i32 %idx
, i32
0, i32
0)
382 void test_smfmac_f32_32x32x32_bf8_bf8
(global v16f
* out
, v2i a
, v4i b
, v16f c
, int idx
)
384 *out
= __builtin_amdgcn_smfmac_f32_32x32x32_bf8_bf8
(a, b
, c
, idx
, 0, 0);
387 // CHECK-GFX940-LABEL
: @test_smfmac_f32_32x32x32_bf8_fp8
388 // CHECK-GFX940
: call
<16 x float
> @llvm.amdgcn.smfmac.f32.32x32x32.bf8.fp8
(<2 x i32
> %a
, <4 x i32
> %b
, <16 x float
> %c
, i32 %idx
, i32
0, i32
0)
389 void test_smfmac_f32_32x32x32_bf8_fp8
(global v16f
* out
, v2i a
, v4i b
, v16f c
, int idx
)
391 *out
= __builtin_amdgcn_smfmac_f32_32x32x32_bf8_fp8
(a, b
, c
, idx
, 0, 0);
394 // CHECK-GFX940-LABEL
: @test_smfmac_f32_32x32x32_fp8_bf8
395 // CHECK-GFX940
: call
<16 x float
> @llvm.amdgcn.smfmac.f32.32x32x32.fp8.bf8
(<2 x i32
> %a
, <4 x i32
> %b
, <16 x float
> %c
, i32 %idx
, i32
0, i32
0)
396 void test_smfmac_f32_32x32x32_fp8_bf8
(global v16f
* out
, v2i a
, v4i b
, v16f c
, int idx
)
398 *out
= __builtin_amdgcn_smfmac_f32_32x32x32_fp8_bf8
(a, b
, c
, idx
, 0, 0);
401 // CHECK-GFX940-LABEL
: @test_smfmac_f32_32x32x32_fp8_fp8
402 // CHECK-GFX940
: call
<16 x float
> @llvm.amdgcn.smfmac.f32.32x32x32.fp8.fp8
(<2 x i32
> %a
, <4 x i32
> %b
, <16 x float
> %c
, i32 %idx
, i32
0, i32
0)
403 void test_smfmac_f32_32x32x32_fp8_fp8
(global v16f
* out
, v2i a
, v4i b
, v16f c
, int idx
)
405 *out
= __builtin_amdgcn_smfmac_f32_32x32x32_fp8_fp8
(a, b
, c
, idx
, 0, 0);
407 #endif
// MFMA_GFX940_TESTS