1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
3 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s
4 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
5 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
7 declare float @llvm.amdgcn.cvt.f32.bf8(i32, i32)
8 declare float @llvm.amdgcn.cvt.f32.fp8(i32, i32)
9 declare <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32, i1)
10 declare <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32, i1)
11 declare i32 @llvm.amdgcn.cvt.pk.bf8.f32(float, float, i32, i1)
12 declare i32 @llvm.amdgcn.cvt.pk.fp8.f32(float, float, i32, i1)
13 declare i32 @llvm.amdgcn.cvt.sr.bf8.f32(float, i32, i32, i32)
14 declare i32 @llvm.amdgcn.cvt.sr.fp8.f32(float, i32, i32, i32)
16 define float @test_cvt_f32_bf8_byte0(i32 %a) {
17 ; GFX940-LABEL: test_cvt_f32_bf8_byte0:
19 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20 ; GFX940-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_0
21 ; GFX940-NEXT: s_setpc_b64 s[30:31]
23 ; GFX12-LABEL: test_cvt_f32_bf8_byte0:
25 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
26 ; GFX12-NEXT: s_wait_expcnt 0x0
27 ; GFX12-NEXT: s_wait_samplecnt 0x0
28 ; GFX12-NEXT: s_wait_bvhcnt 0x0
29 ; GFX12-NEXT: s_wait_kmcnt 0x0
30 ; GFX12-NEXT: v_cvt_f32_bf8_e32 v0, v0
31 ; GFX12-NEXT: s_setpc_b64 s[30:31]
32 %ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 0)
36 define float @test_cvt_f32_bf8_byte1(i32 %a) {
37 ; GFX940-LABEL: test_cvt_f32_bf8_byte1:
39 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40 ; GFX940-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_1
41 ; GFX940-NEXT: s_setpc_b64 s[30:31]
43 ; GFX12-LABEL: test_cvt_f32_bf8_byte1:
45 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
46 ; GFX12-NEXT: s_wait_expcnt 0x0
47 ; GFX12-NEXT: s_wait_samplecnt 0x0
48 ; GFX12-NEXT: s_wait_bvhcnt 0x0
49 ; GFX12-NEXT: s_wait_kmcnt 0x0
50 ; GFX12-NEXT: v_cvt_f32_bf8_e64 v0, v0 byte_sel:1
51 ; GFX12-NEXT: s_setpc_b64 s[30:31]
52 %ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 1)
56 define float @test_cvt_f32_bf8_byte2(i32 %a) {
57 ; GFX940-LABEL: test_cvt_f32_bf8_byte2:
59 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
60 ; GFX940-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_2
61 ; GFX940-NEXT: s_setpc_b64 s[30:31]
63 ; GFX12-LABEL: test_cvt_f32_bf8_byte2:
65 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
66 ; GFX12-NEXT: s_wait_expcnt 0x0
67 ; GFX12-NEXT: s_wait_samplecnt 0x0
68 ; GFX12-NEXT: s_wait_bvhcnt 0x0
69 ; GFX12-NEXT: s_wait_kmcnt 0x0
70 ; GFX12-NEXT: v_cvt_f32_bf8_e64 v0, v0 byte_sel:2
71 ; GFX12-NEXT: s_setpc_b64 s[30:31]
72 %ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 2)
76 define float @test_cvt_f32_bf8_byte3(i32 %a) {
77 ; GFX940-LABEL: test_cvt_f32_bf8_byte3:
79 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
80 ; GFX940-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_3
81 ; GFX940-NEXT: s_setpc_b64 s[30:31]
83 ; GFX12-LABEL: test_cvt_f32_bf8_byte3:
85 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
86 ; GFX12-NEXT: s_wait_expcnt 0x0
87 ; GFX12-NEXT: s_wait_samplecnt 0x0
88 ; GFX12-NEXT: s_wait_bvhcnt 0x0
89 ; GFX12-NEXT: s_wait_kmcnt 0x0
90 ; GFX12-NEXT: v_cvt_f32_bf8_e64 v0, v0 byte_sel:3
91 ; GFX12-NEXT: s_setpc_b64 s[30:31]
92 %ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 3)
96 define float @test_cvt_f32_fp8_byte0(i32 %a) {
97 ; GFX940-LABEL: test_cvt_f32_fp8_byte0:
99 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
100 ; GFX940-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_0
101 ; GFX940-NEXT: s_setpc_b64 s[30:31]
103 ; GFX12-LABEL: test_cvt_f32_fp8_byte0:
105 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
106 ; GFX12-NEXT: s_wait_expcnt 0x0
107 ; GFX12-NEXT: s_wait_samplecnt 0x0
108 ; GFX12-NEXT: s_wait_bvhcnt 0x0
109 ; GFX12-NEXT: s_wait_kmcnt 0x0
110 ; GFX12-NEXT: v_cvt_f32_fp8_e32 v0, v0
111 ; GFX12-NEXT: s_setpc_b64 s[30:31]
112 %ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 0)
116 define float @test_cvt_f32_fp8_byte1(i32 %a) {
117 ; GFX940-LABEL: test_cvt_f32_fp8_byte1:
119 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120 ; GFX940-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_1
121 ; GFX940-NEXT: s_setpc_b64 s[30:31]
123 ; GFX12-LABEL: test_cvt_f32_fp8_byte1:
125 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
126 ; GFX12-NEXT: s_wait_expcnt 0x0
127 ; GFX12-NEXT: s_wait_samplecnt 0x0
128 ; GFX12-NEXT: s_wait_bvhcnt 0x0
129 ; GFX12-NEXT: s_wait_kmcnt 0x0
130 ; GFX12-NEXT: v_cvt_f32_fp8_e64 v0, v0 byte_sel:1
131 ; GFX12-NEXT: s_setpc_b64 s[30:31]
132 %ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 1)
136 define float @test_cvt_f32_fp8_byte2(i32 %a) {
137 ; GFX940-LABEL: test_cvt_f32_fp8_byte2:
139 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
140 ; GFX940-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_2
141 ; GFX940-NEXT: s_setpc_b64 s[30:31]
143 ; GFX12-LABEL: test_cvt_f32_fp8_byte2:
145 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
146 ; GFX12-NEXT: s_wait_expcnt 0x0
147 ; GFX12-NEXT: s_wait_samplecnt 0x0
148 ; GFX12-NEXT: s_wait_bvhcnt 0x0
149 ; GFX12-NEXT: s_wait_kmcnt 0x0
150 ; GFX12-NEXT: v_cvt_f32_fp8_e64 v0, v0 byte_sel:2
151 ; GFX12-NEXT: s_setpc_b64 s[30:31]
152 %ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 2)
156 define float @test_cvt_f32_fp8_byte3(i32 %a) {
157 ; GFX940-LABEL: test_cvt_f32_fp8_byte3:
159 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
160 ; GFX940-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_3
161 ; GFX940-NEXT: s_setpc_b64 s[30:31]
163 ; GFX12-LABEL: test_cvt_f32_fp8_byte3:
165 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
166 ; GFX12-NEXT: s_wait_expcnt 0x0
167 ; GFX12-NEXT: s_wait_samplecnt 0x0
168 ; GFX12-NEXT: s_wait_bvhcnt 0x0
169 ; GFX12-NEXT: s_wait_kmcnt 0x0
170 ; GFX12-NEXT: v_cvt_f32_fp8_e64 v0, v0 byte_sel:3
171 ; GFX12-NEXT: s_setpc_b64 s[30:31]
172 %ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 3)
176 define <2 x float> @test_cvt_pk_f32_bf8_word0(i32 %a) {
177 ; GFX940-LABEL: test_cvt_pk_f32_bf8_word0:
179 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
180 ; GFX940-NEXT: v_cvt_pk_f32_bf8_e32 v[0:1], v0
181 ; GFX940-NEXT: s_setpc_b64 s[30:31]
183 ; GFX12-LABEL: test_cvt_pk_f32_bf8_word0:
185 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
186 ; GFX12-NEXT: s_wait_expcnt 0x0
187 ; GFX12-NEXT: s_wait_samplecnt 0x0
188 ; GFX12-NEXT: s_wait_bvhcnt 0x0
189 ; GFX12-NEXT: s_wait_kmcnt 0x0
190 ; GFX12-NEXT: v_cvt_pk_f32_bf8_e32 v[0:1], v0
191 ; GFX12-NEXT: s_setpc_b64 s[30:31]
192 %ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a, i1 false)
196 define <2 x float> @test_cvt_pk_f32_bf8_word1(i32 %a) {
197 ; GFX940-LABEL: test_cvt_pk_f32_bf8_word1:
199 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
200 ; GFX940-NEXT: v_cvt_pk_f32_bf8_sdwa v[0:1], v0 src0_sel:WORD_1
201 ; GFX940-NEXT: s_setpc_b64 s[30:31]
203 ; GFX12-LABEL: test_cvt_pk_f32_bf8_word1:
205 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
206 ; GFX12-NEXT: s_wait_expcnt 0x0
207 ; GFX12-NEXT: s_wait_samplecnt 0x0
208 ; GFX12-NEXT: s_wait_bvhcnt 0x0
209 ; GFX12-NEXT: s_wait_kmcnt 0x0
210 ; GFX12-NEXT: v_cvt_pk_f32_bf8_e64 v[0:1], v0 op_sel:[1,0]
211 ; GFX12-NEXT: s_setpc_b64 s[30:31]
212 %ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a, i1 true)
216 define <2 x float> @test_cvt_pk_f32_fp8_word0(i32 %a) {
217 ; GFX940-LABEL: test_cvt_pk_f32_fp8_word0:
219 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
220 ; GFX940-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
221 ; GFX940-NEXT: s_setpc_b64 s[30:31]
223 ; GFX12-LABEL: test_cvt_pk_f32_fp8_word0:
225 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
226 ; GFX12-NEXT: s_wait_expcnt 0x0
227 ; GFX12-NEXT: s_wait_samplecnt 0x0
228 ; GFX12-NEXT: s_wait_bvhcnt 0x0
229 ; GFX12-NEXT: s_wait_kmcnt 0x0
230 ; GFX12-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
231 ; GFX12-NEXT: s_setpc_b64 s[30:31]
232 %ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a, i1 false)
236 define <2 x float> @test_cvt_pk_f32_fp8_word1(i32 %a) {
237 ; GFX940-LABEL: test_cvt_pk_f32_fp8_word1:
239 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
240 ; GFX940-NEXT: v_cvt_pk_f32_fp8_sdwa v[0:1], v0 src0_sel:WORD_1
241 ; GFX940-NEXT: s_setpc_b64 s[30:31]
243 ; GFX12-LABEL: test_cvt_pk_f32_fp8_word1:
245 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
246 ; GFX12-NEXT: s_wait_expcnt 0x0
247 ; GFX12-NEXT: s_wait_samplecnt 0x0
248 ; GFX12-NEXT: s_wait_bvhcnt 0x0
249 ; GFX12-NEXT: s_wait_kmcnt 0x0
250 ; GFX12-NEXT: v_cvt_pk_f32_fp8_e64 v[0:1], v0 op_sel:[1,0]
251 ; GFX12-NEXT: s_setpc_b64 s[30:31]
252 %ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a, i1 true)
256 define i32 @test_cvt_pk_bf8_f32_word0(float %x, float %y, i32 %old) {
257 ; GFX940-LABEL: test_cvt_pk_bf8_f32_word0:
259 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
260 ; GFX940-NEXT: v_cvt_pk_bf8_f32 v2, v0, v1
261 ; GFX940-NEXT: v_mov_b32_e32 v0, v2
262 ; GFX940-NEXT: s_setpc_b64 s[30:31]
264 ; GFX12-LABEL: test_cvt_pk_bf8_f32_word0:
266 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
267 ; GFX12-NEXT: s_wait_expcnt 0x0
268 ; GFX12-NEXT: s_wait_samplecnt 0x0
269 ; GFX12-NEXT: s_wait_bvhcnt 0x0
270 ; GFX12-NEXT: s_wait_kmcnt 0x0
271 ; GFX12-NEXT: v_cvt_pk_bf8_f32 v2, v0, v1
272 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
273 ; GFX12-NEXT: v_mov_b32_e32 v0, v2
274 ; GFX12-NEXT: s_setpc_b64 s[30:31]
275 %ret = tail call i32 @llvm.amdgcn.cvt.pk.bf8.f32(float %x, float %y, i32 %old, i1 false)
279 define i32 @test_cvt_pk_bf8_f32_word1(float %x, float %y, i32 %old) {
280 ; GFX940-LABEL: test_cvt_pk_bf8_f32_word1:
282 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
283 ; GFX940-NEXT: v_cvt_pk_bf8_f32 v2, v0, v1 op_sel:[0,0,1]
284 ; GFX940-NEXT: s_nop 0
285 ; GFX940-NEXT: v_mov_b32_e32 v0, v2
286 ; GFX940-NEXT: s_setpc_b64 s[30:31]
288 ; GFX12-LABEL: test_cvt_pk_bf8_f32_word1:
290 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
291 ; GFX12-NEXT: s_wait_expcnt 0x0
292 ; GFX12-NEXT: s_wait_samplecnt 0x0
293 ; GFX12-NEXT: s_wait_bvhcnt 0x0
294 ; GFX12-NEXT: s_wait_kmcnt 0x0
295 ; GFX12-NEXT: v_cvt_pk_bf8_f32 v2, v0, v1 op_sel:[0,0,1]
296 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
297 ; GFX12-NEXT: v_mov_b32_e32 v0, v2
298 ; GFX12-NEXT: s_setpc_b64 s[30:31]
299 %ret = tail call i32 @llvm.amdgcn.cvt.pk.bf8.f32(float %x, float %y, i32 %old, i1 true)
303 define i32 @test_cvt_pk_fp8_f32_word0(float %x, float %y, i32 %old) {
304 ; GFX940-LABEL: test_cvt_pk_fp8_f32_word0:
306 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
307 ; GFX940-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1
308 ; GFX940-NEXT: v_mov_b32_e32 v0, v2
309 ; GFX940-NEXT: s_setpc_b64 s[30:31]
311 ; GFX12-LABEL: test_cvt_pk_fp8_f32_word0:
313 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
314 ; GFX12-NEXT: s_wait_expcnt 0x0
315 ; GFX12-NEXT: s_wait_samplecnt 0x0
316 ; GFX12-NEXT: s_wait_bvhcnt 0x0
317 ; GFX12-NEXT: s_wait_kmcnt 0x0
318 ; GFX12-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1
319 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
320 ; GFX12-NEXT: v_mov_b32_e32 v0, v2
321 ; GFX12-NEXT: s_setpc_b64 s[30:31]
322 %ret = tail call i32 @llvm.amdgcn.cvt.pk.fp8.f32(float %x, float %y, i32 %old, i1 false)
326 define i32 @test_cvt_pk_fp8_f32_word1(float %x, float %y, i32 %old) {
327 ; GFX940-LABEL: test_cvt_pk_fp8_f32_word1:
329 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
330 ; GFX940-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1 op_sel:[0,0,1]
331 ; GFX940-NEXT: s_nop 0
332 ; GFX940-NEXT: v_mov_b32_e32 v0, v2
333 ; GFX940-NEXT: s_setpc_b64 s[30:31]
335 ; GFX12-LABEL: test_cvt_pk_fp8_f32_word1:
337 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
338 ; GFX12-NEXT: s_wait_expcnt 0x0
339 ; GFX12-NEXT: s_wait_samplecnt 0x0
340 ; GFX12-NEXT: s_wait_bvhcnt 0x0
341 ; GFX12-NEXT: s_wait_kmcnt 0x0
342 ; GFX12-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1 op_sel:[0,0,1]
343 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
344 ; GFX12-NEXT: v_mov_b32_e32 v0, v2
345 ; GFX12-NEXT: s_setpc_b64 s[30:31]
346 %ret = tail call i32 @llvm.amdgcn.cvt.pk.fp8.f32(float %x, float %y, i32 %old, i1 true)
350 define i32 @test_cvt_sr_bf8_f32_byte0(float %x, i32 %r, i32 %old) {
351 ; GFX940-LABEL: test_cvt_sr_bf8_f32_byte0:
353 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
354 ; GFX940-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1
355 ; GFX940-NEXT: v_mov_b32_e32 v0, v2
356 ; GFX940-NEXT: s_setpc_b64 s[30:31]
358 ; GFX12-LABEL: test_cvt_sr_bf8_f32_byte0:
360 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
361 ; GFX12-NEXT: s_wait_expcnt 0x0
362 ; GFX12-NEXT: s_wait_samplecnt 0x0
363 ; GFX12-NEXT: s_wait_bvhcnt 0x0
364 ; GFX12-NEXT: s_wait_kmcnt 0x0
365 ; GFX12-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1
366 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
367 ; GFX12-NEXT: v_mov_b32_e32 v0, v2
368 ; GFX12-NEXT: s_setpc_b64 s[30:31]
369 %ret = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f32(float %x, i32 %r, i32 %old, i32 0)
373 define i32 @test_cvt_sr_bf8_f32_byte1(float %x, i32 %r, i32 %old) {
374 ; GFX940-LABEL: test_cvt_sr_bf8_f32_byte1:
376 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
377 ; GFX940-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,1,0]
378 ; GFX940-NEXT: v_mov_b32_e32 v0, v2
379 ; GFX940-NEXT: s_setpc_b64 s[30:31]
381 ; GFX12-LABEL: test_cvt_sr_bf8_f32_byte1:
383 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
384 ; GFX12-NEXT: s_wait_expcnt 0x0
385 ; GFX12-NEXT: s_wait_samplecnt 0x0
386 ; GFX12-NEXT: s_wait_bvhcnt 0x0
387 ; GFX12-NEXT: s_wait_kmcnt 0x0
388 ; GFX12-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 byte_sel:1
389 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
390 ; GFX12-NEXT: v_mov_b32_e32 v0, v2
391 ; GFX12-NEXT: s_setpc_b64 s[30:31]
392 %ret = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f32(float %x, i32 %r, i32 %old, i32 1)
396 define i32 @test_cvt_sr_bf8_f32_byte2(float %x, i32 %r, i32 %old) {
397 ; GFX940-LABEL: test_cvt_sr_bf8_f32_byte2:
399 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
400 ; GFX940-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,0,1]
401 ; GFX940-NEXT: s_nop 0
402 ; GFX940-NEXT: v_mov_b32_e32 v0, v2
403 ; GFX940-NEXT: s_setpc_b64 s[30:31]
405 ; GFX12-LABEL: test_cvt_sr_bf8_f32_byte2:
407 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
408 ; GFX12-NEXT: s_wait_expcnt 0x0
409 ; GFX12-NEXT: s_wait_samplecnt 0x0
410 ; GFX12-NEXT: s_wait_bvhcnt 0x0
411 ; GFX12-NEXT: s_wait_kmcnt 0x0
412 ; GFX12-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 byte_sel:2
413 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
414 ; GFX12-NEXT: v_mov_b32_e32 v0, v2
415 ; GFX12-NEXT: s_setpc_b64 s[30:31]
416 %ret = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f32(float %x, i32 %r, i32 %old, i32 2)
420 define i32 @test_cvt_sr_bf8_f32_byte3(float %x, i32 %r, i32 %old) {
421 ; GFX940-LABEL: test_cvt_sr_bf8_f32_byte3:
423 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
424 ; GFX940-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,1,1]
425 ; GFX940-NEXT: s_nop 0
426 ; GFX940-NEXT: v_mov_b32_e32 v0, v2
427 ; GFX940-NEXT: s_setpc_b64 s[30:31]
429 ; GFX12-LABEL: test_cvt_sr_bf8_f32_byte3:
431 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
432 ; GFX12-NEXT: s_wait_expcnt 0x0
433 ; GFX12-NEXT: s_wait_samplecnt 0x0
434 ; GFX12-NEXT: s_wait_bvhcnt 0x0
435 ; GFX12-NEXT: s_wait_kmcnt 0x0
436 ; GFX12-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 byte_sel:3
437 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
438 ; GFX12-NEXT: v_mov_b32_e32 v0, v2
439 ; GFX12-NEXT: s_setpc_b64 s[30:31]
440 %ret = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f32(float %x, i32 %r, i32 %old, i32 3)
444 define i32 @test_cvt_sr_fp8_f32_byte0(float %x, i32 %r, i32 %old) {
445 ; GFX940-LABEL: test_cvt_sr_fp8_f32_byte0:
447 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
448 ; GFX940-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1
449 ; GFX940-NEXT: v_mov_b32_e32 v0, v2
450 ; GFX940-NEXT: s_setpc_b64 s[30:31]
452 ; GFX12-LABEL: test_cvt_sr_fp8_f32_byte0:
454 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
455 ; GFX12-NEXT: s_wait_expcnt 0x0
456 ; GFX12-NEXT: s_wait_samplecnt 0x0
457 ; GFX12-NEXT: s_wait_bvhcnt 0x0
458 ; GFX12-NEXT: s_wait_kmcnt 0x0
459 ; GFX12-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1
460 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
461 ; GFX12-NEXT: v_mov_b32_e32 v0, v2
462 ; GFX12-NEXT: s_setpc_b64 s[30:31]
463 %ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %x, i32 %r, i32 %old, i32 0)
467 define i32 @test_cvt_sr_fp8_f32_byte1(float %x, i32 %r, i32 %old) {
468 ; GFX940-LABEL: test_cvt_sr_fp8_f32_byte1:
470 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
471 ; GFX940-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,1,0]
472 ; GFX940-NEXT: v_mov_b32_e32 v0, v2
473 ; GFX940-NEXT: s_setpc_b64 s[30:31]
475 ; GFX12-LABEL: test_cvt_sr_fp8_f32_byte1:
477 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
478 ; GFX12-NEXT: s_wait_expcnt 0x0
479 ; GFX12-NEXT: s_wait_samplecnt 0x0
480 ; GFX12-NEXT: s_wait_bvhcnt 0x0
481 ; GFX12-NEXT: s_wait_kmcnt 0x0
482 ; GFX12-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 byte_sel:1
483 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
484 ; GFX12-NEXT: v_mov_b32_e32 v0, v2
485 ; GFX12-NEXT: s_setpc_b64 s[30:31]
486 %ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %x, i32 %r, i32 %old, i32 1)
490 define i32 @test_cvt_sr_fp8_f32_byte2(float %x, i32 %r, i32 %old) {
491 ; GFX940-LABEL: test_cvt_sr_fp8_f32_byte2:
493 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
494 ; GFX940-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,0,1]
495 ; GFX940-NEXT: s_nop 0
496 ; GFX940-NEXT: v_mov_b32_e32 v0, v2
497 ; GFX940-NEXT: s_setpc_b64 s[30:31]
499 ; GFX12-LABEL: test_cvt_sr_fp8_f32_byte2:
501 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
502 ; GFX12-NEXT: s_wait_expcnt 0x0
503 ; GFX12-NEXT: s_wait_samplecnt 0x0
504 ; GFX12-NEXT: s_wait_bvhcnt 0x0
505 ; GFX12-NEXT: s_wait_kmcnt 0x0
506 ; GFX12-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 byte_sel:2
507 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
508 ; GFX12-NEXT: v_mov_b32_e32 v0, v2
509 ; GFX12-NEXT: s_setpc_b64 s[30:31]
510 %ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %x, i32 %r, i32 %old, i32 2)
514 define i32 @test_cvt_sr_fp8_f32_byte3(float %x, i32 %r, i32 %old) {
515 ; GFX940-LABEL: test_cvt_sr_fp8_f32_byte3:
517 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
518 ; GFX940-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,1,1]
519 ; GFX940-NEXT: s_nop 0
520 ; GFX940-NEXT: v_mov_b32_e32 v0, v2
521 ; GFX940-NEXT: s_setpc_b64 s[30:31]
523 ; GFX12-LABEL: test_cvt_sr_fp8_f32_byte3:
525 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
526 ; GFX12-NEXT: s_wait_expcnt 0x0
527 ; GFX12-NEXT: s_wait_samplecnt 0x0
528 ; GFX12-NEXT: s_wait_bvhcnt 0x0
529 ; GFX12-NEXT: s_wait_kmcnt 0x0
530 ; GFX12-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 byte_sel:3
531 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
532 ; GFX12-NEXT: v_mov_b32_e32 v0, v2
533 ; GFX12-NEXT: s_setpc_b64 s[30:31]
534 %ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %x, i32 %r, i32 %old, i32 3)
538 define float @test_sext_cvt_f32_fp8(i16 %a) {
539 ; GFX940-LABEL: test_sext_cvt_f32_fp8:
541 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
542 ; GFX940-NEXT: v_bfe_i32 v0, v0, 0, 16
543 ; GFX940-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_1
544 ; GFX940-NEXT: s_setpc_b64 s[30:31]
546 ; GFX12-LABEL: test_sext_cvt_f32_fp8:
548 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
549 ; GFX12-NEXT: s_wait_expcnt 0x0
550 ; GFX12-NEXT: s_wait_samplecnt 0x0
551 ; GFX12-NEXT: s_wait_bvhcnt 0x0
552 ; GFX12-NEXT: s_wait_kmcnt 0x0
553 ; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16
554 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
555 ; GFX12-NEXT: v_cvt_f32_fp8_e64 v0, v0 byte_sel:1
556 ; GFX12-NEXT: s_setpc_b64 s[30:31]
557 %a.sext = sext i16 %a to i32
558 %ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a.sext, i32 1)
562 define float @test_sext_cvt_f32_bf8(i16 %a) {
563 ; GFX940-LABEL: test_sext_cvt_f32_bf8:
565 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
566 ; GFX940-NEXT: v_bfe_i32 v0, v0, 0, 16
567 ; GFX940-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_1
568 ; GFX940-NEXT: s_setpc_b64 s[30:31]
570 ; GFX12-LABEL: test_sext_cvt_f32_bf8:
572 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
573 ; GFX12-NEXT: s_wait_expcnt 0x0
574 ; GFX12-NEXT: s_wait_samplecnt 0x0
575 ; GFX12-NEXT: s_wait_bvhcnt 0x0
576 ; GFX12-NEXT: s_wait_kmcnt 0x0
577 ; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16
578 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
579 ; GFX12-NEXT: v_cvt_f32_bf8_e64 v0, v0 byte_sel:1
580 ; GFX12-NEXT: s_setpc_b64 s[30:31]
581 %a.sext = sext i16 %a to i32
582 %ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a.sext, i32 1)
586 define <2 x float> @test_sext_cvt_pk_f32_bf8_word1(i16 %a) {
587 ; GFX940-LABEL: test_sext_cvt_pk_f32_bf8_word1:
589 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
590 ; GFX940-NEXT: v_bfe_i32 v0, v0, 0, 16
591 ; GFX940-NEXT: v_cvt_pk_f32_bf8_sdwa v[0:1], v0 src0_sel:WORD_1
592 ; GFX940-NEXT: s_setpc_b64 s[30:31]
594 ; GFX12-LABEL: test_sext_cvt_pk_f32_bf8_word1:
596 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
597 ; GFX12-NEXT: s_wait_expcnt 0x0
598 ; GFX12-NEXT: s_wait_samplecnt 0x0
599 ; GFX12-NEXT: s_wait_bvhcnt 0x0
600 ; GFX12-NEXT: s_wait_kmcnt 0x0
601 ; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16
602 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
603 ; GFX12-NEXT: v_cvt_pk_f32_bf8_e64 v[0:1], v0 op_sel:[1,0]
604 ; GFX12-NEXT: s_setpc_b64 s[30:31]
605 %a.sext = sext i16 %a to i32
606 %ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a.sext, i1 true)
610 define <2 x float> @test_sext_cvt_pk_f32_fp8_word0(i16 %a) {
611 ; GFX940-LABEL: test_sext_cvt_pk_f32_fp8_word0:
613 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
614 ; GFX940-NEXT: v_bfe_i32 v0, v0, 0, 16
615 ; GFX940-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
616 ; GFX940-NEXT: s_setpc_b64 s[30:31]
618 ; GFX12-LABEL: test_sext_cvt_pk_f32_fp8_word0:
620 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
621 ; GFX12-NEXT: s_wait_expcnt 0x0
622 ; GFX12-NEXT: s_wait_samplecnt 0x0
623 ; GFX12-NEXT: s_wait_bvhcnt 0x0
624 ; GFX12-NEXT: s_wait_kmcnt 0x0
625 ; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16
626 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
627 ; GFX12-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
628 ; GFX12-NEXT: s_setpc_b64 s[30:31]
629 %a.sext = sext i16 %a to i32
630 %ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a.sext, i1 false)