1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9X,GFX940 %s
3 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9X,GFX940 %s
4 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9X,GFX950 %s
5 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9X,GFX950 %s
6 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
7 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
9 declare float @llvm.amdgcn.cvt.f32.bf8(i32, i32)
10 declare float @llvm.amdgcn.cvt.f32.fp8(i32, i32)
11 declare <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32, i1)
12 declare <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32, i1)
13 declare i32 @llvm.amdgcn.cvt.pk.bf8.f32(float, float, i32, i1)
14 declare i32 @llvm.amdgcn.cvt.pk.fp8.f32(float, float, i32, i1)
15 declare i32 @llvm.amdgcn.cvt.sr.bf8.f32(float, i32, i32, i32)
16 declare i32 @llvm.amdgcn.cvt.sr.fp8.f32(float, i32, i32, i32)
18 define float @test_cvt_f32_bf8_byte0(i32 %a) {
19 ; GFX940-LABEL: test_cvt_f32_bf8_byte0:
21 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22 ; GFX940-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_0
23 ; GFX940-NEXT: s_setpc_b64 s[30:31]
25 ; GFX950-LABEL: test_cvt_f32_bf8_byte0:
27 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
28 ; GFX950-NEXT: v_cvt_f32_bf8_e32 v0, v0
29 ; GFX950-NEXT: s_setpc_b64 s[30:31]
31 ; GFX12-LABEL: test_cvt_f32_bf8_byte0:
33 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
34 ; GFX12-NEXT: s_wait_expcnt 0x0
35 ; GFX12-NEXT: s_wait_samplecnt 0x0
36 ; GFX12-NEXT: s_wait_bvhcnt 0x0
37 ; GFX12-NEXT: s_wait_kmcnt 0x0
38 ; GFX12-NEXT: v_cvt_f32_bf8_e32 v0, v0
39 ; GFX12-NEXT: s_setpc_b64 s[30:31]
40 %ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 0)
44 define float @test_cvt_f32_bf8_byte1(i32 %a) {
45 ; GFX9X-LABEL: test_cvt_f32_bf8_byte1:
47 ; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48 ; GFX9X-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_1
49 ; GFX9X-NEXT: s_setpc_b64 s[30:31]
51 ; GFX12-LABEL: test_cvt_f32_bf8_byte1:
53 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
54 ; GFX12-NEXT: s_wait_expcnt 0x0
55 ; GFX12-NEXT: s_wait_samplecnt 0x0
56 ; GFX12-NEXT: s_wait_bvhcnt 0x0
57 ; GFX12-NEXT: s_wait_kmcnt 0x0
58 ; GFX12-NEXT: v_cvt_f32_bf8_e64 v0, v0 byte_sel:1
59 ; GFX12-NEXT: s_setpc_b64 s[30:31]
60 %ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 1)
64 define float @test_cvt_f32_bf8_byte2(i32 %a) {
65 ; GFX9X-LABEL: test_cvt_f32_bf8_byte2:
67 ; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68 ; GFX9X-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_2
69 ; GFX9X-NEXT: s_setpc_b64 s[30:31]
71 ; GFX12-LABEL: test_cvt_f32_bf8_byte2:
73 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
74 ; GFX12-NEXT: s_wait_expcnt 0x0
75 ; GFX12-NEXT: s_wait_samplecnt 0x0
76 ; GFX12-NEXT: s_wait_bvhcnt 0x0
77 ; GFX12-NEXT: s_wait_kmcnt 0x0
78 ; GFX12-NEXT: v_cvt_f32_bf8_e64 v0, v0 byte_sel:2
79 ; GFX12-NEXT: s_setpc_b64 s[30:31]
80 %ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 2)
84 define float @test_cvt_f32_bf8_byte3(i32 %a) {
85 ; GFX9X-LABEL: test_cvt_f32_bf8_byte3:
87 ; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88 ; GFX9X-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_3
89 ; GFX9X-NEXT: s_setpc_b64 s[30:31]
91 ; GFX12-LABEL: test_cvt_f32_bf8_byte3:
93 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
94 ; GFX12-NEXT: s_wait_expcnt 0x0
95 ; GFX12-NEXT: s_wait_samplecnt 0x0
96 ; GFX12-NEXT: s_wait_bvhcnt 0x0
97 ; GFX12-NEXT: s_wait_kmcnt 0x0
98 ; GFX12-NEXT: v_cvt_f32_bf8_e64 v0, v0 byte_sel:3
99 ; GFX12-NEXT: s_setpc_b64 s[30:31]
100 %ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 3)
104 define float @test_cvt_f32_fp8_byte0(i32 %a) {
105 ; GFX940-LABEL: test_cvt_f32_fp8_byte0:
107 ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
108 ; GFX940-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_0
109 ; GFX940-NEXT: s_setpc_b64 s[30:31]
111 ; GFX950-LABEL: test_cvt_f32_fp8_byte0:
113 ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114 ; GFX950-NEXT: v_cvt_f32_fp8_e32 v0, v0
115 ; GFX950-NEXT: s_setpc_b64 s[30:31]
117 ; GFX12-LABEL: test_cvt_f32_fp8_byte0:
119 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
120 ; GFX12-NEXT: s_wait_expcnt 0x0
121 ; GFX12-NEXT: s_wait_samplecnt 0x0
122 ; GFX12-NEXT: s_wait_bvhcnt 0x0
123 ; GFX12-NEXT: s_wait_kmcnt 0x0
124 ; GFX12-NEXT: v_cvt_f32_fp8_e32 v0, v0
125 ; GFX12-NEXT: s_setpc_b64 s[30:31]
126 %ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 0)
130 define float @test_cvt_f32_fp8_byte1(i32 %a) {
131 ; GFX9X-LABEL: test_cvt_f32_fp8_byte1:
133 ; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
134 ; GFX9X-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_1
135 ; GFX9X-NEXT: s_setpc_b64 s[30:31]
137 ; GFX12-LABEL: test_cvt_f32_fp8_byte1:
139 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
140 ; GFX12-NEXT: s_wait_expcnt 0x0
141 ; GFX12-NEXT: s_wait_samplecnt 0x0
142 ; GFX12-NEXT: s_wait_bvhcnt 0x0
143 ; GFX12-NEXT: s_wait_kmcnt 0x0
144 ; GFX12-NEXT: v_cvt_f32_fp8_e64 v0, v0 byte_sel:1
145 ; GFX12-NEXT: s_setpc_b64 s[30:31]
146 %ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 1)
150 define float @test_cvt_f32_fp8_byte2(i32 %a) {
151 ; GFX9X-LABEL: test_cvt_f32_fp8_byte2:
153 ; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
154 ; GFX9X-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_2
155 ; GFX9X-NEXT: s_setpc_b64 s[30:31]
157 ; GFX12-LABEL: test_cvt_f32_fp8_byte2:
159 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
160 ; GFX12-NEXT: s_wait_expcnt 0x0
161 ; GFX12-NEXT: s_wait_samplecnt 0x0
162 ; GFX12-NEXT: s_wait_bvhcnt 0x0
163 ; GFX12-NEXT: s_wait_kmcnt 0x0
164 ; GFX12-NEXT: v_cvt_f32_fp8_e64 v0, v0 byte_sel:2
165 ; GFX12-NEXT: s_setpc_b64 s[30:31]
166 %ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 2)
170 define float @test_cvt_f32_fp8_byte3(i32 %a) {
171 ; GFX9X-LABEL: test_cvt_f32_fp8_byte3:
173 ; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
174 ; GFX9X-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_3
175 ; GFX9X-NEXT: s_setpc_b64 s[30:31]
177 ; GFX12-LABEL: test_cvt_f32_fp8_byte3:
179 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
180 ; GFX12-NEXT: s_wait_expcnt 0x0
181 ; GFX12-NEXT: s_wait_samplecnt 0x0
182 ; GFX12-NEXT: s_wait_bvhcnt 0x0
183 ; GFX12-NEXT: s_wait_kmcnt 0x0
184 ; GFX12-NEXT: v_cvt_f32_fp8_e64 v0, v0 byte_sel:3
185 ; GFX12-NEXT: s_setpc_b64 s[30:31]
186 %ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 3)
190 define <2 x float> @test_cvt_pk_f32_bf8_word0(i32 %a) {
191 ; GFX9X-LABEL: test_cvt_pk_f32_bf8_word0:
193 ; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
194 ; GFX9X-NEXT: v_cvt_pk_f32_bf8_e32 v[0:1], v0
195 ; GFX9X-NEXT: s_setpc_b64 s[30:31]
197 ; GFX12-LABEL: test_cvt_pk_f32_bf8_word0:
199 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
200 ; GFX12-NEXT: s_wait_expcnt 0x0
201 ; GFX12-NEXT: s_wait_samplecnt 0x0
202 ; GFX12-NEXT: s_wait_bvhcnt 0x0
203 ; GFX12-NEXT: s_wait_kmcnt 0x0
204 ; GFX12-NEXT: v_cvt_pk_f32_bf8_e32 v[0:1], v0
205 ; GFX12-NEXT: s_setpc_b64 s[30:31]
206 %ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a, i1 false)
210 define <2 x float> @test_cvt_pk_f32_bf8_word1(i32 %a) {
211 ; GFX9X-LABEL: test_cvt_pk_f32_bf8_word1:
213 ; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
214 ; GFX9X-NEXT: v_cvt_pk_f32_bf8_sdwa v[0:1], v0 src0_sel:WORD_1
215 ; GFX9X-NEXT: s_setpc_b64 s[30:31]
217 ; GFX12-LABEL: test_cvt_pk_f32_bf8_word1:
219 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
220 ; GFX12-NEXT: s_wait_expcnt 0x0
221 ; GFX12-NEXT: s_wait_samplecnt 0x0
222 ; GFX12-NEXT: s_wait_bvhcnt 0x0
223 ; GFX12-NEXT: s_wait_kmcnt 0x0
224 ; GFX12-NEXT: v_cvt_pk_f32_bf8_e64 v[0:1], v0 op_sel:[1,0]
225 ; GFX12-NEXT: s_setpc_b64 s[30:31]
226 %ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a, i1 true)
230 define <2 x float> @test_cvt_pk_f32_fp8_word0(i32 %a) {
231 ; GFX9X-LABEL: test_cvt_pk_f32_fp8_word0:
233 ; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
234 ; GFX9X-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
235 ; GFX9X-NEXT: s_setpc_b64 s[30:31]
237 ; GFX12-LABEL: test_cvt_pk_f32_fp8_word0:
239 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
240 ; GFX12-NEXT: s_wait_expcnt 0x0
241 ; GFX12-NEXT: s_wait_samplecnt 0x0
242 ; GFX12-NEXT: s_wait_bvhcnt 0x0
243 ; GFX12-NEXT: s_wait_kmcnt 0x0
244 ; GFX12-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
245 ; GFX12-NEXT: s_setpc_b64 s[30:31]
246 %ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a, i1 false)
250 define <2 x float> @test_cvt_pk_f32_fp8_word1(i32 %a) {
251 ; GFX9X-LABEL: test_cvt_pk_f32_fp8_word1:
253 ; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
254 ; GFX9X-NEXT: v_cvt_pk_f32_fp8_sdwa v[0:1], v0 src0_sel:WORD_1
255 ; GFX9X-NEXT: s_setpc_b64 s[30:31]
257 ; GFX12-LABEL: test_cvt_pk_f32_fp8_word1:
259 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
260 ; GFX12-NEXT: s_wait_expcnt 0x0
261 ; GFX12-NEXT: s_wait_samplecnt 0x0
262 ; GFX12-NEXT: s_wait_bvhcnt 0x0
263 ; GFX12-NEXT: s_wait_kmcnt 0x0
264 ; GFX12-NEXT: v_cvt_pk_f32_fp8_e64 v[0:1], v0 op_sel:[1,0]
265 ; GFX12-NEXT: s_setpc_b64 s[30:31]
266 %ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a, i1 true)
270 define i32 @test_cvt_pk_bf8_f32_word0(float %x, float %y, i32 %old) {
271 ; GFX9X-LABEL: test_cvt_pk_bf8_f32_word0:
273 ; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
274 ; GFX9X-NEXT: v_cvt_pk_bf8_f32 v2, v0, v1
275 ; GFX9X-NEXT: v_mov_b32_e32 v0, v2
276 ; GFX9X-NEXT: s_setpc_b64 s[30:31]
278 ; GFX12-LABEL: test_cvt_pk_bf8_f32_word0:
280 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
281 ; GFX12-NEXT: s_wait_expcnt 0x0
282 ; GFX12-NEXT: s_wait_samplecnt 0x0
283 ; GFX12-NEXT: s_wait_bvhcnt 0x0
284 ; GFX12-NEXT: s_wait_kmcnt 0x0
285 ; GFX12-NEXT: v_cvt_pk_bf8_f32 v2, v0, v1
286 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
287 ; GFX12-NEXT: v_mov_b32_e32 v0, v2
288 ; GFX12-NEXT: s_setpc_b64 s[30:31]
289 %ret = tail call i32 @llvm.amdgcn.cvt.pk.bf8.f32(float %x, float %y, i32 %old, i1 false)
293 define i32 @test_cvt_pk_bf8_f32_word1(float %x, float %y, i32 %old) {
294 ; GFX9X-LABEL: test_cvt_pk_bf8_f32_word1:
296 ; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
297 ; GFX9X-NEXT: v_cvt_pk_bf8_f32 v2, v0, v1 op_sel:[0,0,1]
298 ; GFX9X-NEXT: s_nop 0
299 ; GFX9X-NEXT: v_mov_b32_e32 v0, v2
300 ; GFX9X-NEXT: s_setpc_b64 s[30:31]
302 ; GFX12-LABEL: test_cvt_pk_bf8_f32_word1:
304 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
305 ; GFX12-NEXT: s_wait_expcnt 0x0
306 ; GFX12-NEXT: s_wait_samplecnt 0x0
307 ; GFX12-NEXT: s_wait_bvhcnt 0x0
308 ; GFX12-NEXT: s_wait_kmcnt 0x0
309 ; GFX12-NEXT: v_cvt_pk_bf8_f32 v2, v0, v1 op_sel:[0,0,1]
310 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
311 ; GFX12-NEXT: v_mov_b32_e32 v0, v2
312 ; GFX12-NEXT: s_setpc_b64 s[30:31]
313 %ret = tail call i32 @llvm.amdgcn.cvt.pk.bf8.f32(float %x, float %y, i32 %old, i1 true)
317 define i32 @test_cvt_pk_fp8_f32_word0(float %x, float %y, i32 %old) {
318 ; GFX9X-LABEL: test_cvt_pk_fp8_f32_word0:
320 ; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
321 ; GFX9X-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1
322 ; GFX9X-NEXT: v_mov_b32_e32 v0, v2
323 ; GFX9X-NEXT: s_setpc_b64 s[30:31]
325 ; GFX12-LABEL: test_cvt_pk_fp8_f32_word0:
327 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
328 ; GFX12-NEXT: s_wait_expcnt 0x0
329 ; GFX12-NEXT: s_wait_samplecnt 0x0
330 ; GFX12-NEXT: s_wait_bvhcnt 0x0
331 ; GFX12-NEXT: s_wait_kmcnt 0x0
332 ; GFX12-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1
333 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
334 ; GFX12-NEXT: v_mov_b32_e32 v0, v2
335 ; GFX12-NEXT: s_setpc_b64 s[30:31]
336 %ret = tail call i32 @llvm.amdgcn.cvt.pk.fp8.f32(float %x, float %y, i32 %old, i1 false)
340 define i32 @test_cvt_pk_fp8_f32_word1(float %x, float %y, i32 %old) {
341 ; GFX9X-LABEL: test_cvt_pk_fp8_f32_word1:
343 ; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
344 ; GFX9X-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1 op_sel:[0,0,1]
345 ; GFX9X-NEXT: s_nop 0
346 ; GFX9X-NEXT: v_mov_b32_e32 v0, v2
347 ; GFX9X-NEXT: s_setpc_b64 s[30:31]
349 ; GFX12-LABEL: test_cvt_pk_fp8_f32_word1:
351 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
352 ; GFX12-NEXT: s_wait_expcnt 0x0
353 ; GFX12-NEXT: s_wait_samplecnt 0x0
354 ; GFX12-NEXT: s_wait_bvhcnt 0x0
355 ; GFX12-NEXT: s_wait_kmcnt 0x0
356 ; GFX12-NEXT: v_cvt_pk_fp8_f32 v2, v0, v1 op_sel:[0,0,1]
357 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
358 ; GFX12-NEXT: v_mov_b32_e32 v0, v2
359 ; GFX12-NEXT: s_setpc_b64 s[30:31]
360 %ret = tail call i32 @llvm.amdgcn.cvt.pk.fp8.f32(float %x, float %y, i32 %old, i1 true)
364 define i32 @test_cvt_sr_bf8_f32_byte0(float %x, i32 %r, i32 %old) {
365 ; GFX9X-LABEL: test_cvt_sr_bf8_f32_byte0:
367 ; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
368 ; GFX9X-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1
369 ; GFX9X-NEXT: v_mov_b32_e32 v0, v2
370 ; GFX9X-NEXT: s_setpc_b64 s[30:31]
372 ; GFX12-LABEL: test_cvt_sr_bf8_f32_byte0:
374 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
375 ; GFX12-NEXT: s_wait_expcnt 0x0
376 ; GFX12-NEXT: s_wait_samplecnt 0x0
377 ; GFX12-NEXT: s_wait_bvhcnt 0x0
378 ; GFX12-NEXT: s_wait_kmcnt 0x0
379 ; GFX12-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1
380 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
381 ; GFX12-NEXT: v_mov_b32_e32 v0, v2
382 ; GFX12-NEXT: s_setpc_b64 s[30:31]
383 %ret = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f32(float %x, i32 %r, i32 %old, i32 0)
387 define i32 @test_cvt_sr_bf8_f32_byte1(float %x, i32 %r, i32 %old) {
388 ; GFX9X-LABEL: test_cvt_sr_bf8_f32_byte1:
390 ; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
391 ; GFX9X-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,1,0]
392 ; GFX9X-NEXT: s_nop 0
393 ; GFX9X-NEXT: v_mov_b32_e32 v0, v2
394 ; GFX9X-NEXT: s_setpc_b64 s[30:31]
396 ; GFX12-LABEL: test_cvt_sr_bf8_f32_byte1:
398 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
399 ; GFX12-NEXT: s_wait_expcnt 0x0
400 ; GFX12-NEXT: s_wait_samplecnt 0x0
401 ; GFX12-NEXT: s_wait_bvhcnt 0x0
402 ; GFX12-NEXT: s_wait_kmcnt 0x0
403 ; GFX12-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 byte_sel:1
404 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
405 ; GFX12-NEXT: v_mov_b32_e32 v0, v2
406 ; GFX12-NEXT: s_setpc_b64 s[30:31]
407 %ret = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f32(float %x, i32 %r, i32 %old, i32 1)
411 define i32 @test_cvt_sr_bf8_f32_byte2(float %x, i32 %r, i32 %old) {
412 ; GFX9X-LABEL: test_cvt_sr_bf8_f32_byte2:
414 ; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
415 ; GFX9X-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,0,1]
416 ; GFX9X-NEXT: s_nop 0
417 ; GFX9X-NEXT: v_mov_b32_e32 v0, v2
418 ; GFX9X-NEXT: s_setpc_b64 s[30:31]
420 ; GFX12-LABEL: test_cvt_sr_bf8_f32_byte2:
422 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
423 ; GFX12-NEXT: s_wait_expcnt 0x0
424 ; GFX12-NEXT: s_wait_samplecnt 0x0
425 ; GFX12-NEXT: s_wait_bvhcnt 0x0
426 ; GFX12-NEXT: s_wait_kmcnt 0x0
427 ; GFX12-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 byte_sel:2
428 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
429 ; GFX12-NEXT: v_mov_b32_e32 v0, v2
430 ; GFX12-NEXT: s_setpc_b64 s[30:31]
431 %ret = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f32(float %x, i32 %r, i32 %old, i32 2)
435 define i32 @test_cvt_sr_bf8_f32_byte3(float %x, i32 %r, i32 %old) {
436 ; GFX9X-LABEL: test_cvt_sr_bf8_f32_byte3:
438 ; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
439 ; GFX9X-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,1,1]
440 ; GFX9X-NEXT: s_nop 0
441 ; GFX9X-NEXT: v_mov_b32_e32 v0, v2
442 ; GFX9X-NEXT: s_setpc_b64 s[30:31]
444 ; GFX12-LABEL: test_cvt_sr_bf8_f32_byte3:
446 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
447 ; GFX12-NEXT: s_wait_expcnt 0x0
448 ; GFX12-NEXT: s_wait_samplecnt 0x0
449 ; GFX12-NEXT: s_wait_bvhcnt 0x0
450 ; GFX12-NEXT: s_wait_kmcnt 0x0
451 ; GFX12-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 byte_sel:3
452 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
453 ; GFX12-NEXT: v_mov_b32_e32 v0, v2
454 ; GFX12-NEXT: s_setpc_b64 s[30:31]
455 %ret = tail call i32 @llvm.amdgcn.cvt.sr.bf8.f32(float %x, i32 %r, i32 %old, i32 3)
459 define i32 @test_cvt_sr_fp8_f32_byte0(float %x, i32 %r, i32 %old) {
460 ; GFX9X-LABEL: test_cvt_sr_fp8_f32_byte0:
462 ; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
463 ; GFX9X-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1
464 ; GFX9X-NEXT: v_mov_b32_e32 v0, v2
465 ; GFX9X-NEXT: s_setpc_b64 s[30:31]
467 ; GFX12-LABEL: test_cvt_sr_fp8_f32_byte0:
469 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
470 ; GFX12-NEXT: s_wait_expcnt 0x0
471 ; GFX12-NEXT: s_wait_samplecnt 0x0
472 ; GFX12-NEXT: s_wait_bvhcnt 0x0
473 ; GFX12-NEXT: s_wait_kmcnt 0x0
474 ; GFX12-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1
475 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
476 ; GFX12-NEXT: v_mov_b32_e32 v0, v2
477 ; GFX12-NEXT: s_setpc_b64 s[30:31]
478 %ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %x, i32 %r, i32 %old, i32 0)
482 define i32 @test_cvt_sr_fp8_f32_byte1(float %x, i32 %r, i32 %old) {
483 ; GFX9X-LABEL: test_cvt_sr_fp8_f32_byte1:
485 ; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
486 ; GFX9X-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,1,0]
487 ; GFX9X-NEXT: s_nop 0
488 ; GFX9X-NEXT: v_mov_b32_e32 v0, v2
489 ; GFX9X-NEXT: s_setpc_b64 s[30:31]
491 ; GFX12-LABEL: test_cvt_sr_fp8_f32_byte1:
493 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
494 ; GFX12-NEXT: s_wait_expcnt 0x0
495 ; GFX12-NEXT: s_wait_samplecnt 0x0
496 ; GFX12-NEXT: s_wait_bvhcnt 0x0
497 ; GFX12-NEXT: s_wait_kmcnt 0x0
498 ; GFX12-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 byte_sel:1
499 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
500 ; GFX12-NEXT: v_mov_b32_e32 v0, v2
501 ; GFX12-NEXT: s_setpc_b64 s[30:31]
502 %ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %x, i32 %r, i32 %old, i32 1)
506 define i32 @test_cvt_sr_fp8_f32_byte2(float %x, i32 %r, i32 %old) {
507 ; GFX9X-LABEL: test_cvt_sr_fp8_f32_byte2:
509 ; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
510 ; GFX9X-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,0,1]
511 ; GFX9X-NEXT: s_nop 0
512 ; GFX9X-NEXT: v_mov_b32_e32 v0, v2
513 ; GFX9X-NEXT: s_setpc_b64 s[30:31]
515 ; GFX12-LABEL: test_cvt_sr_fp8_f32_byte2:
517 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
518 ; GFX12-NEXT: s_wait_expcnt 0x0
519 ; GFX12-NEXT: s_wait_samplecnt 0x0
520 ; GFX12-NEXT: s_wait_bvhcnt 0x0
521 ; GFX12-NEXT: s_wait_kmcnt 0x0
522 ; GFX12-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 byte_sel:2
523 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
524 ; GFX12-NEXT: v_mov_b32_e32 v0, v2
525 ; GFX12-NEXT: s_setpc_b64 s[30:31]
526 %ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %x, i32 %r, i32 %old, i32 2)
530 define i32 @test_cvt_sr_fp8_f32_byte3(float %x, i32 %r, i32 %old) {
531 ; GFX9X-LABEL: test_cvt_sr_fp8_f32_byte3:
533 ; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
534 ; GFX9X-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,1,1]
535 ; GFX9X-NEXT: s_nop 0
536 ; GFX9X-NEXT: v_mov_b32_e32 v0, v2
537 ; GFX9X-NEXT: s_setpc_b64 s[30:31]
539 ; GFX12-LABEL: test_cvt_sr_fp8_f32_byte3:
541 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
542 ; GFX12-NEXT: s_wait_expcnt 0x0
543 ; GFX12-NEXT: s_wait_samplecnt 0x0
544 ; GFX12-NEXT: s_wait_bvhcnt 0x0
545 ; GFX12-NEXT: s_wait_kmcnt 0x0
546 ; GFX12-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 byte_sel:3
547 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
548 ; GFX12-NEXT: v_mov_b32_e32 v0, v2
549 ; GFX12-NEXT: s_setpc_b64 s[30:31]
550 %ret = tail call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %x, i32 %r, i32 %old, i32 3)
554 define float @test_sext_cvt_f32_fp8(i16 %a) {
555 ; GFX9X-LABEL: test_sext_cvt_f32_fp8:
557 ; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
558 ; GFX9X-NEXT: v_bfe_i32 v0, v0, 0, 16
559 ; GFX9X-NEXT: v_cvt_f32_fp8_sdwa v0, v0 src0_sel:BYTE_1
560 ; GFX9X-NEXT: s_setpc_b64 s[30:31]
562 ; GFX12-LABEL: test_sext_cvt_f32_fp8:
564 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
565 ; GFX12-NEXT: s_wait_expcnt 0x0
566 ; GFX12-NEXT: s_wait_samplecnt 0x0
567 ; GFX12-NEXT: s_wait_bvhcnt 0x0
568 ; GFX12-NEXT: s_wait_kmcnt 0x0
569 ; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16
570 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
571 ; GFX12-NEXT: v_cvt_f32_fp8_e64 v0, v0 byte_sel:1
572 ; GFX12-NEXT: s_setpc_b64 s[30:31]
573 %a.sext = sext i16 %a to i32
574 %ret = tail call float @llvm.amdgcn.cvt.f32.fp8(i32 %a.sext, i32 1)
578 define float @test_sext_cvt_f32_bf8(i16 %a) {
579 ; GFX9X-LABEL: test_sext_cvt_f32_bf8:
581 ; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
582 ; GFX9X-NEXT: v_bfe_i32 v0, v0, 0, 16
583 ; GFX9X-NEXT: v_cvt_f32_bf8_sdwa v0, v0 src0_sel:BYTE_1
584 ; GFX9X-NEXT: s_setpc_b64 s[30:31]
586 ; GFX12-LABEL: test_sext_cvt_f32_bf8:
588 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
589 ; GFX12-NEXT: s_wait_expcnt 0x0
590 ; GFX12-NEXT: s_wait_samplecnt 0x0
591 ; GFX12-NEXT: s_wait_bvhcnt 0x0
592 ; GFX12-NEXT: s_wait_kmcnt 0x0
593 ; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16
594 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
595 ; GFX12-NEXT: v_cvt_f32_bf8_e64 v0, v0 byte_sel:1
596 ; GFX12-NEXT: s_setpc_b64 s[30:31]
597 %a.sext = sext i16 %a to i32
598 %ret = tail call float @llvm.amdgcn.cvt.f32.bf8(i32 %a.sext, i32 1)
602 define <2 x float> @test_sext_cvt_pk_f32_bf8_word1(i16 %a) {
603 ; GFX9X-LABEL: test_sext_cvt_pk_f32_bf8_word1:
605 ; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
606 ; GFX9X-NEXT: v_bfe_i32 v0, v0, 0, 16
607 ; GFX9X-NEXT: v_cvt_pk_f32_bf8_sdwa v[0:1], v0 src0_sel:WORD_1
608 ; GFX9X-NEXT: s_setpc_b64 s[30:31]
610 ; GFX12-LABEL: test_sext_cvt_pk_f32_bf8_word1:
612 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
613 ; GFX12-NEXT: s_wait_expcnt 0x0
614 ; GFX12-NEXT: s_wait_samplecnt 0x0
615 ; GFX12-NEXT: s_wait_bvhcnt 0x0
616 ; GFX12-NEXT: s_wait_kmcnt 0x0
617 ; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16
618 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
619 ; GFX12-NEXT: v_cvt_pk_f32_bf8_e64 v[0:1], v0 op_sel:[1,0]
620 ; GFX12-NEXT: s_setpc_b64 s[30:31]
621 %a.sext = sext i16 %a to i32
622 %ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a.sext, i1 true)
626 define <2 x float> @test_sext_cvt_pk_f32_fp8_word0(i16 %a) {
627 ; GFX9X-LABEL: test_sext_cvt_pk_f32_fp8_word0:
629 ; GFX9X-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
630 ; GFX9X-NEXT: v_bfe_i32 v0, v0, 0, 16
631 ; GFX9X-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
632 ; GFX9X-NEXT: s_setpc_b64 s[30:31]
634 ; GFX12-LABEL: test_sext_cvt_pk_f32_fp8_word0:
636 ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
637 ; GFX12-NEXT: s_wait_expcnt 0x0
638 ; GFX12-NEXT: s_wait_samplecnt 0x0
639 ; GFX12-NEXT: s_wait_bvhcnt 0x0
640 ; GFX12-NEXT: s_wait_kmcnt 0x0
641 ; GFX12-NEXT: v_bfe_i32 v0, v0, 0, 16
642 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
643 ; GFX12-NEXT: v_cvt_pk_f32_fp8_e32 v[0:1], v0
644 ; GFX12-NEXT: s_setpc_b64 s[30:31]
645 %a.sext = sext i16 %a to i32
646 %ret = tail call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a.sext, i1 false)