1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s
5 define float @test_amdgcn_dot4_f32_fp8_bf8(i32 %a, i32 %b, float %c) {
6 ; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_bf8:
7 ; GFX12: ; %bb.0: ; %entry
8 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
9 ; GFX12-NEXT: s_wait_expcnt 0x0
10 ; GFX12-NEXT: s_wait_samplecnt 0x0
11 ; GFX12-NEXT: s_wait_bvhcnt 0x0
12 ; GFX12-NEXT: s_wait_kmcnt 0x0
13 ; GFX12-NEXT: v_dot4_f32_fp8_bf8 v0, v0, v1, v2
14 ; GFX12-NEXT: s_setpc_b64 s[30:31]
16 %ret = call float @llvm.amdgcn.dot4.f32.fp8.bf8(i32 %a, i32 %b, float %c)
20 define float @test_amdgcn_dot4_f32_fp8_bf8_fabs(i32 %a, i32 %b, float %c) {
21 ; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_bf8_fabs:
22 ; GFX12: ; %bb.0: ; %entry
23 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
24 ; GFX12-NEXT: s_wait_expcnt 0x0
25 ; GFX12-NEXT: s_wait_samplecnt 0x0
26 ; GFX12-NEXT: s_wait_bvhcnt 0x0
27 ; GFX12-NEXT: s_wait_kmcnt 0x0
28 ; GFX12-NEXT: v_dot4_f32_fp8_bf8 v0, v0, v1, v2 neg_hi:[0,0,1]
29 ; GFX12-NEXT: s_setpc_b64 s[30:31]
31 %fabs.c = call float @llvm.fabs.f32(float %c)
32 %ret = call float @llvm.amdgcn.dot4.f32.fp8.bf8(i32 %a, i32 %b, float %fabs.c)
36 define float @test_amdgcn_dot4_f32_fp8_bf8_fneg(i32 %a, i32 %b, float %c) {
37 ; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_bf8_fneg:
38 ; GFX12: ; %bb.0: ; %entry
39 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
40 ; GFX12-NEXT: s_wait_expcnt 0x0
41 ; GFX12-NEXT: s_wait_samplecnt 0x0
42 ; GFX12-NEXT: s_wait_bvhcnt 0x0
43 ; GFX12-NEXT: s_wait_kmcnt 0x0
44 ; GFX12-NEXT: v_dot4_f32_fp8_bf8 v0, v0, v1, v2 neg_lo:[0,0,1]
45 ; GFX12-NEXT: s_setpc_b64 s[30:31]
47 %fneg.c = fneg float %c
48 %ret = call float @llvm.amdgcn.dot4.f32.fp8.bf8(i32 %a, i32 %b, float %fneg.c)
52 define float @test_amdgcn_dot4_f32_fp8_bf8_fabs_fneg(i32 %a, i32 %b, float %c) {
53 ; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_bf8_fabs_fneg:
54 ; GFX12: ; %bb.0: ; %entry
55 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
56 ; GFX12-NEXT: s_wait_expcnt 0x0
57 ; GFX12-NEXT: s_wait_samplecnt 0x0
58 ; GFX12-NEXT: s_wait_bvhcnt 0x0
59 ; GFX12-NEXT: s_wait_kmcnt 0x0
60 ; GFX12-NEXT: v_dot4_f32_fp8_bf8 v0, v0, v1, v2 neg_hi:[0,0,1]
61 ; GFX12-NEXT: s_setpc_b64 s[30:31]
63 %fneg.c = fneg float %c
64 %fabs.fneg.c = call float @llvm.fabs.f32(float %fneg.c)
65 %ret = call float @llvm.amdgcn.dot4.f32.fp8.bf8(i32 %a, i32 %b, float %fabs.fneg.c)
69 define float @test_amdgcn_dot4_f32_fp8_bf8_fneg_fabs(i32 %a, i32 %b, float %c) {
70 ; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_bf8_fneg_fabs:
71 ; GFX12: ; %bb.0: ; %entry
72 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
73 ; GFX12-NEXT: s_wait_expcnt 0x0
74 ; GFX12-NEXT: s_wait_samplecnt 0x0
75 ; GFX12-NEXT: s_wait_bvhcnt 0x0
76 ; GFX12-NEXT: s_wait_kmcnt 0x0
77 ; GFX12-NEXT: v_dot4_f32_fp8_bf8 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1]
78 ; GFX12-NEXT: s_setpc_b64 s[30:31]
80 %fabs.c = call float @llvm.fabs.f32(float %c)
81 %fneg.fabs.c = fneg float %fabs.c
82 %ret = call float @llvm.amdgcn.dot4.f32.fp8.bf8(i32 %a, i32 %b, float %fneg.fabs.c)
86 define float @test_amdgcn_dot4_f32_bf8_fp8(i32 %a, i32 %b, float %c) {
87 ; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_fp8:
88 ; GFX12: ; %bb.0: ; %entry
89 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
90 ; GFX12-NEXT: s_wait_expcnt 0x0
91 ; GFX12-NEXT: s_wait_samplecnt 0x0
92 ; GFX12-NEXT: s_wait_bvhcnt 0x0
93 ; GFX12-NEXT: s_wait_kmcnt 0x0
94 ; GFX12-NEXT: v_dot4_f32_bf8_fp8 v0, v0, v1, v2
95 ; GFX12-NEXT: s_setpc_b64 s[30:31]
97 %ret = call float @llvm.amdgcn.dot4.f32.bf8.fp8(i32 %a, i32 %b, float %c)
101 define float @test_amdgcn_dot4_f32_bf8_fp8_fabs(i32 %a, i32 %b, float %c) {
102 ; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_fp8_fabs:
103 ; GFX12: ; %bb.0: ; %entry
104 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
105 ; GFX12-NEXT: s_wait_expcnt 0x0
106 ; GFX12-NEXT: s_wait_samplecnt 0x0
107 ; GFX12-NEXT: s_wait_bvhcnt 0x0
108 ; GFX12-NEXT: s_wait_kmcnt 0x0
109 ; GFX12-NEXT: v_dot4_f32_bf8_fp8 v0, v0, v1, v2 neg_hi:[0,0,1]
110 ; GFX12-NEXT: s_setpc_b64 s[30:31]
112 %fabs.c = call float @llvm.fabs.f32(float %c)
113 %ret = call float @llvm.amdgcn.dot4.f32.bf8.fp8(i32 %a, i32 %b, float %fabs.c)
117 define float @test_amdgcn_dot4_f32_bf8_fp8_fneg(i32 %a, i32 %b, float %c) {
118 ; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_fp8_fneg:
119 ; GFX12: ; %bb.0: ; %entry
120 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
121 ; GFX12-NEXT: s_wait_expcnt 0x0
122 ; GFX12-NEXT: s_wait_samplecnt 0x0
123 ; GFX12-NEXT: s_wait_bvhcnt 0x0
124 ; GFX12-NEXT: s_wait_kmcnt 0x0
125 ; GFX12-NEXT: v_dot4_f32_bf8_fp8 v0, v0, v1, v2 neg_lo:[0,0,1]
126 ; GFX12-NEXT: s_setpc_b64 s[30:31]
128 %fneg.c = fneg float %c
129 %ret = call float @llvm.amdgcn.dot4.f32.bf8.fp8(i32 %a, i32 %b, float %fneg.c)
133 define float @test_amdgcn_dot4_f32_bf8_fp8_fabs_fneg(i32 %a, i32 %b, float %c) {
134 ; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_fp8_fabs_fneg:
135 ; GFX12: ; %bb.0: ; %entry
136 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
137 ; GFX12-NEXT: s_wait_expcnt 0x0
138 ; GFX12-NEXT: s_wait_samplecnt 0x0
139 ; GFX12-NEXT: s_wait_bvhcnt 0x0
140 ; GFX12-NEXT: s_wait_kmcnt 0x0
141 ; GFX12-NEXT: v_dot4_f32_bf8_fp8 v0, v0, v1, v2 neg_hi:[0,0,1]
142 ; GFX12-NEXT: s_setpc_b64 s[30:31]
144 %fneg.c = fneg float %c
145 %fabs.fneg.c = call float @llvm.fabs.f32(float %fneg.c)
146 %ret = call float @llvm.amdgcn.dot4.f32.bf8.fp8(i32 %a, i32 %b, float %fabs.fneg.c)
150 define float @test_amdgcn_dot4_f32_bf8_fp8_fneg_fabs(i32 %a, i32 %b, float %c) {
151 ; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_fp8_fneg_fabs:
152 ; GFX12: ; %bb.0: ; %entry
153 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
154 ; GFX12-NEXT: s_wait_expcnt 0x0
155 ; GFX12-NEXT: s_wait_samplecnt 0x0
156 ; GFX12-NEXT: s_wait_bvhcnt 0x0
157 ; GFX12-NEXT: s_wait_kmcnt 0x0
158 ; GFX12-NEXT: v_dot4_f32_bf8_fp8 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1]
159 ; GFX12-NEXT: s_setpc_b64 s[30:31]
161 %fabs.c = call float @llvm.fabs.f32(float %c)
162 %fneg.fabs.c = fneg float %fabs.c
163 %ret = call float @llvm.amdgcn.dot4.f32.bf8.fp8(i32 %a, i32 %b, float %fneg.fabs.c)
167 define float @test_amdgcn_dot4_f32_fp8_fp8(i32 %a, i32 %b, float %c) {
168 ; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_fp8:
169 ; GFX12: ; %bb.0: ; %entry
170 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
171 ; GFX12-NEXT: s_wait_expcnt 0x0
172 ; GFX12-NEXT: s_wait_samplecnt 0x0
173 ; GFX12-NEXT: s_wait_bvhcnt 0x0
174 ; GFX12-NEXT: s_wait_kmcnt 0x0
175 ; GFX12-NEXT: v_dot4_f32_fp8_fp8 v0, v0, v1, v2
176 ; GFX12-NEXT: s_setpc_b64 s[30:31]
178 %ret = call float @llvm.amdgcn.dot4.f32.fp8.fp8(i32 %a, i32 %b, float %c)
182 define float @test_amdgcn_dot4_f32_fp8_fp8_fabs(i32 %a, i32 %b, float %c) {
183 ; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_fp8_fabs:
184 ; GFX12: ; %bb.0: ; %entry
185 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
186 ; GFX12-NEXT: s_wait_expcnt 0x0
187 ; GFX12-NEXT: s_wait_samplecnt 0x0
188 ; GFX12-NEXT: s_wait_bvhcnt 0x0
189 ; GFX12-NEXT: s_wait_kmcnt 0x0
190 ; GFX12-NEXT: v_dot4_f32_fp8_fp8 v0, v0, v1, v2 neg_hi:[0,0,1]
191 ; GFX12-NEXT: s_setpc_b64 s[30:31]
193 %fabs.c = call float @llvm.fabs.f32(float %c)
194 %ret = call float @llvm.amdgcn.dot4.f32.fp8.fp8(i32 %a, i32 %b, float %fabs.c)
198 define float @test_amdgcn_dot4_f32_fp8_fp8_fneg(i32 %a, i32 %b, float %c) {
199 ; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_fp8_fneg:
200 ; GFX12: ; %bb.0: ; %entry
201 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
202 ; GFX12-NEXT: s_wait_expcnt 0x0
203 ; GFX12-NEXT: s_wait_samplecnt 0x0
204 ; GFX12-NEXT: s_wait_bvhcnt 0x0
205 ; GFX12-NEXT: s_wait_kmcnt 0x0
206 ; GFX12-NEXT: v_dot4_f32_fp8_fp8 v0, v0, v1, v2 neg_lo:[0,0,1]
207 ; GFX12-NEXT: s_setpc_b64 s[30:31]
209 %fneg.c = fneg float %c
210 %ret = call float @llvm.amdgcn.dot4.f32.fp8.fp8(i32 %a, i32 %b, float %fneg.c)
214 define float @test_amdgcn_dot4_f32_fp8_fp8_fabs_fneg(i32 %a, i32 %b, float %c) {
215 ; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_fp8_fabs_fneg:
216 ; GFX12: ; %bb.0: ; %entry
217 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
218 ; GFX12-NEXT: s_wait_expcnt 0x0
219 ; GFX12-NEXT: s_wait_samplecnt 0x0
220 ; GFX12-NEXT: s_wait_bvhcnt 0x0
221 ; GFX12-NEXT: s_wait_kmcnt 0x0
222 ; GFX12-NEXT: v_dot4_f32_fp8_fp8 v0, v0, v1, v2 neg_hi:[0,0,1]
223 ; GFX12-NEXT: s_setpc_b64 s[30:31]
225 %fneg.c = fneg float %c
226 %fabs.fneg.c = call float @llvm.fabs.f32(float %fneg.c)
227 %ret = call float @llvm.amdgcn.dot4.f32.fp8.fp8(i32 %a, i32 %b, float %fabs.fneg.c)
231 define float @test_amdgcn_dot4_f32_fp8_fp8_fneg_fabs(i32 %a, i32 %b, float %c) {
232 ; GFX12-LABEL: test_amdgcn_dot4_f32_fp8_fp8_fneg_fabs:
233 ; GFX12: ; %bb.0: ; %entry
234 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
235 ; GFX12-NEXT: s_wait_expcnt 0x0
236 ; GFX12-NEXT: s_wait_samplecnt 0x0
237 ; GFX12-NEXT: s_wait_bvhcnt 0x0
238 ; GFX12-NEXT: s_wait_kmcnt 0x0
239 ; GFX12-NEXT: v_dot4_f32_fp8_fp8 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1]
240 ; GFX12-NEXT: s_setpc_b64 s[30:31]
242 %fabs.c = call float @llvm.fabs.f32(float %c)
243 %fneg.fabs.c = fneg float %fabs.c
244 %ret = call float @llvm.amdgcn.dot4.f32.fp8.fp8(i32 %a, i32 %b, float %fneg.fabs.c)
248 define float @test_amdgcn_dot4_f32_bf8_bf8(i32 %a, i32 %b, float %c) {
249 ; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_bf8:
250 ; GFX12: ; %bb.0: ; %entry
251 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
252 ; GFX12-NEXT: s_wait_expcnt 0x0
253 ; GFX12-NEXT: s_wait_samplecnt 0x0
254 ; GFX12-NEXT: s_wait_bvhcnt 0x0
255 ; GFX12-NEXT: s_wait_kmcnt 0x0
256 ; GFX12-NEXT: v_dot4_f32_bf8_bf8 v0, v0, v1, v2
257 ; GFX12-NEXT: s_setpc_b64 s[30:31]
259 %ret = call float @llvm.amdgcn.dot4.f32.bf8.bf8(i32 %a, i32 %b, float %c)
263 define float @test_amdgcn_dot4_f32_bf8_bf8_fabs(i32 %a, i32 %b, float %c) {
264 ; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_bf8_fabs:
265 ; GFX12: ; %bb.0: ; %entry
266 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
267 ; GFX12-NEXT: s_wait_expcnt 0x0
268 ; GFX12-NEXT: s_wait_samplecnt 0x0
269 ; GFX12-NEXT: s_wait_bvhcnt 0x0
270 ; GFX12-NEXT: s_wait_kmcnt 0x0
271 ; GFX12-NEXT: v_dot4_f32_bf8_bf8 v0, v0, v1, v2 neg_hi:[0,0,1]
272 ; GFX12-NEXT: s_setpc_b64 s[30:31]
274 %fabs.c = call float @llvm.fabs.f32(float %c)
275 %ret = call float @llvm.amdgcn.dot4.f32.bf8.bf8(i32 %a, i32 %b, float %fabs.c)
279 define float @test_amdgcn_dot4_f32_bf8_bf8_fneg(i32 %a, i32 %b, float %c) {
280 ; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_bf8_fneg:
281 ; GFX12: ; %bb.0: ; %entry
282 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
283 ; GFX12-NEXT: s_wait_expcnt 0x0
284 ; GFX12-NEXT: s_wait_samplecnt 0x0
285 ; GFX12-NEXT: s_wait_bvhcnt 0x0
286 ; GFX12-NEXT: s_wait_kmcnt 0x0
287 ; GFX12-NEXT: v_dot4_f32_bf8_bf8 v0, v0, v1, v2 neg_lo:[0,0,1]
288 ; GFX12-NEXT: s_setpc_b64 s[30:31]
290 %fneg.c = fneg float %c
291 %ret = call float @llvm.amdgcn.dot4.f32.bf8.bf8(i32 %a, i32 %b, float %fneg.c)
295 define float @test_amdgcn_dot4_f32_bf8_bf8_fabs_fneg(i32 %a, i32 %b, float %c) {
296 ; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_bf8_fabs_fneg:
297 ; GFX12: ; %bb.0: ; %entry
298 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
299 ; GFX12-NEXT: s_wait_expcnt 0x0
300 ; GFX12-NEXT: s_wait_samplecnt 0x0
301 ; GFX12-NEXT: s_wait_bvhcnt 0x0
302 ; GFX12-NEXT: s_wait_kmcnt 0x0
303 ; GFX12-NEXT: v_dot4_f32_bf8_bf8 v0, v0, v1, v2 neg_hi:[0,0,1]
304 ; GFX12-NEXT: s_setpc_b64 s[30:31]
306 %fneg.c = fneg float %c
307 %fabs.fneg.c = call float @llvm.fabs.f32(float %fneg.c)
308 %ret = call float @llvm.amdgcn.dot4.f32.bf8.bf8(i32 %a, i32 %b, float %fabs.fneg.c)
312 define float @test_amdgcn_dot4_f32_bf8_bf8_fneg_fabs(i32 %a, i32 %b, float %c) {
313 ; GFX12-LABEL: test_amdgcn_dot4_f32_bf8_bf8_fneg_fabs:
314 ; GFX12: ; %bb.0: ; %entry
315 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
316 ; GFX12-NEXT: s_wait_expcnt 0x0
317 ; GFX12-NEXT: s_wait_samplecnt 0x0
318 ; GFX12-NEXT: s_wait_bvhcnt 0x0
319 ; GFX12-NEXT: s_wait_kmcnt 0x0
320 ; GFX12-NEXT: v_dot4_f32_bf8_bf8 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1]
321 ; GFX12-NEXT: s_setpc_b64 s[30:31]
323 %fabs.c = call float @llvm.fabs.f32(float %c)
324 %fneg.fabs.c = fneg float %fabs.c
325 %ret = call float @llvm.amdgcn.dot4.f32.bf8.bf8(i32 %a, i32 %b, float %fneg.fabs.c)
329 declare float @llvm.amdgcn.dot4.f32.fp8.bf8(i32 %a, i32 %b, float %c)
330 declare float @llvm.amdgcn.dot4.f32.bf8.fp8(i32 %a, i32 %b, float %c)
331 declare float @llvm.amdgcn.dot4.f32.fp8.fp8(i32 %a, i32 %b, float %c)
332 declare float @llvm.amdgcn.dot4.f32.bf8.bf8(i32 %a, i32 %b, float %c)
334 declare float @llvm.fabs.f32(float %a)