1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX950 %s
3 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX950 %s
5 declare i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.bf16(i32 %old, bfloat %src, i32 %seed, float %scale, i32 %dst_sel)
6 declare i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.f16(i32 %old, half %src, i32 %seed, float %scale, i32 %dst_sel)
7 declare i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.f32(i32 %old, float %src, i32 %seed, float %scale, i32 %dst_sel)
8 declare i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.bf16(i32 %old, bfloat %src, i32 %seed, float %scale, i32 %dst_sel)
9 declare i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.f16(i32 %old, half %src, i32 %seed, float %scale, i32 %dst_sel)
10 declare i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.f32(i32 %old, float %src, i32 %seed, float %scale, i32 %dst_sel)
12 define amdgpu_ps void @test_cvt_scalef32_sr_bf8_bf16_dst_sel_0(ptr addrspace(1) %out, bfloat %src, i32 %seed, float %scale) {
13 ; GFX950-LABEL: test_cvt_scalef32_sr_bf8_bf16_dst_sel_0:
15 ; GFX950-NEXT: global_load_dword v5, v[0:1], off
16 ; GFX950-NEXT: s_waitcnt vmcnt(0)
17 ; GFX950-NEXT: v_cvt_scalef32_sr_bf8_bf16 v5, v2, v3, v4
18 ; GFX950-NEXT: global_store_dword v[0:1], v5, off
19 ; GFX950-NEXT: s_endpgm
20 %old = load i32, ptr addrspace(1) %out, align 4
21 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.bf16(i32 %old, bfloat %src, i32 %seed, float %scale, i32 0)
22 store i32 %cvt, ptr addrspace(1) %out, align 4
26 define amdgpu_ps void @test_cvt_scalef32_sr_bf8_bf16_dst_sel_1(ptr addrspace(1) %out, bfloat %src, i32 %seed, float %scale) {
27 ; GFX950-LABEL: test_cvt_scalef32_sr_bf8_bf16_dst_sel_1:
29 ; GFX950-NEXT: global_load_dword v5, v[0:1], off
30 ; GFX950-NEXT: s_waitcnt vmcnt(0)
31 ; GFX950-NEXT: v_cvt_scalef32_sr_bf8_bf16 v5, v2, v3, v4 op_sel:[0,0,1,0]
32 ; GFX950-NEXT: global_store_dword v[0:1], v5, off
33 ; GFX950-NEXT: s_endpgm
34 %old = load i32, ptr addrspace(1) %out, align 4
35 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.bf16(i32 %old, bfloat %src, i32 %seed, float %scale, i32 1)
36 store i32 %cvt, ptr addrspace(1) %out, align 4
40 define amdgpu_ps void @test_cvt_scalef32_sr_bf8_bf16_dst_sel_2(ptr addrspace(1) %out, bfloat %src, i32 %seed, float %scale) {
41 ; GFX950-LABEL: test_cvt_scalef32_sr_bf8_bf16_dst_sel_2:
43 ; GFX950-NEXT: global_load_dword v5, v[0:1], off
44 ; GFX950-NEXT: s_waitcnt vmcnt(0)
45 ; GFX950-NEXT: v_cvt_scalef32_sr_bf8_bf16 v5, v2, v3, v4 op_sel:[0,0,0,1]
46 ; GFX950-NEXT: global_store_dword v[0:1], v5, off
47 ; GFX950-NEXT: s_endpgm
48 %old = load i32, ptr addrspace(1) %out, align 4
49 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.bf16(i32 %old, bfloat %src, i32 %seed, float %scale, i32 2)
50 store i32 %cvt, ptr addrspace(1) %out, align 4
54 define amdgpu_ps void @test_cvt_scalef32_sr_bf8_bf16_dst_sel_3(ptr addrspace(1) %out, bfloat %src, i32 %seed, float %scale) {
55 ; GFX950-LABEL: test_cvt_scalef32_sr_bf8_bf16_dst_sel_3:
57 ; GFX950-NEXT: global_load_dword v5, v[0:1], off
58 ; GFX950-NEXT: s_waitcnt vmcnt(0)
59 ; GFX950-NEXT: v_cvt_scalef32_sr_bf8_bf16 v5, v2, v3, v4 op_sel:[0,0,1,1]
60 ; GFX950-NEXT: global_store_dword v[0:1], v5, off
61 ; GFX950-NEXT: s_endpgm
62 %old = load i32, ptr addrspace(1) %out, align 4
63 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.bf16(i32 %old, bfloat %src, i32 %seed, float %scale, i32 3)
64 store i32 %cvt, ptr addrspace(1) %out, align 4
68 define amdgpu_ps void @test_cvt_scalef32_sr_bf8_f16_dst_sel_0(ptr addrspace(1) %out, half %src, i32 %seed, float %scale) {
69 ; GFX950-LABEL: test_cvt_scalef32_sr_bf8_f16_dst_sel_0:
71 ; GFX950-NEXT: global_load_dword v5, v[0:1], off
72 ; GFX950-NEXT: s_waitcnt vmcnt(0)
73 ; GFX950-NEXT: v_cvt_scalef32_sr_bf8_f16 v5, v2, v3, v4
74 ; GFX950-NEXT: global_store_dword v[0:1], v5, off
75 ; GFX950-NEXT: s_endpgm
76 %old = load i32, ptr addrspace(1) %out, align 4
77 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.f16(i32 %old, half %src, i32 %seed, float %scale, i32 0)
78 store i32 %cvt, ptr addrspace(1) %out, align 4
82 define amdgpu_ps void @test_cvt_scalef32_sr_bf8_f16_dst_sel_1(ptr addrspace(1) %out, half %src, i32 %seed, float %scale) {
83 ; GFX950-LABEL: test_cvt_scalef32_sr_bf8_f16_dst_sel_1:
85 ; GFX950-NEXT: global_load_dword v5, v[0:1], off
86 ; GFX950-NEXT: s_waitcnt vmcnt(0)
87 ; GFX950-NEXT: v_cvt_scalef32_sr_bf8_f16 v5, v2, v3, v4 op_sel:[0,0,1,0]
88 ; GFX950-NEXT: global_store_dword v[0:1], v5, off
89 ; GFX950-NEXT: s_endpgm
90 %old = load i32, ptr addrspace(1) %out, align 4
91 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.f16(i32 %old, half %src, i32 %seed, float %scale, i32 1)
92 store i32 %cvt, ptr addrspace(1) %out, align 4
96 define amdgpu_ps void @test_cvt_scalef32_sr_bf8_f16_dst_sel_2(ptr addrspace(1) %out, half %src, i32 %seed, float %scale) {
97 ; GFX950-LABEL: test_cvt_scalef32_sr_bf8_f16_dst_sel_2:
99 ; GFX950-NEXT: global_load_dword v5, v[0:1], off
100 ; GFX950-NEXT: s_waitcnt vmcnt(0)
101 ; GFX950-NEXT: v_cvt_scalef32_sr_bf8_f16 v5, v2, v3, v4 op_sel:[0,0,0,1]
102 ; GFX950-NEXT: global_store_dword v[0:1], v5, off
103 ; GFX950-NEXT: s_endpgm
104 %old = load i32, ptr addrspace(1) %out, align 4
105 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.f16(i32 %old, half %src, i32 %seed, float %scale, i32 2)
106 store i32 %cvt, ptr addrspace(1) %out, align 4
110 define amdgpu_ps void @test_cvt_scalef32_sr_bf8_f16_dst_sel_3(ptr addrspace(1) %out, half %src, i32 %seed, float %scale) {
111 ; GFX950-LABEL: test_cvt_scalef32_sr_bf8_f16_dst_sel_3:
113 ; GFX950-NEXT: global_load_dword v5, v[0:1], off
114 ; GFX950-NEXT: s_waitcnt vmcnt(0)
115 ; GFX950-NEXT: v_cvt_scalef32_sr_bf8_f16 v5, v2, v3, v4 op_sel:[0,0,1,1]
116 ; GFX950-NEXT: global_store_dword v[0:1], v5, off
117 ; GFX950-NEXT: s_endpgm
118 %old = load i32, ptr addrspace(1) %out, align 4
119 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.f16(i32 %old, half %src, i32 %seed, float %scale, i32 3)
120 store i32 %cvt, ptr addrspace(1) %out, align 4
124 define amdgpu_ps void @test_cvt_scalef32_sr_bf8_f32_dst_sel_0(ptr addrspace(1) %out, float %src, i32 %seed, float %scale) {
125 ; GFX950-LABEL: test_cvt_scalef32_sr_bf8_f32_dst_sel_0:
127 ; GFX950-NEXT: global_load_dword v5, v[0:1], off
128 ; GFX950-NEXT: s_waitcnt vmcnt(0)
129 ; GFX950-NEXT: v_cvt_scalef32_sr_bf8_f32 v5, v2, v3, v4
130 ; GFX950-NEXT: global_store_dword v[0:1], v5, off
131 ; GFX950-NEXT: s_endpgm
132 %old = load i32, ptr addrspace(1) %out, align 4
133 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.f32(i32 %old, float %src, i32 %seed, float %scale, i32 0)
134 store i32 %cvt, ptr addrspace(1) %out, align 4
138 define amdgpu_ps void @test_cvt_scalef32_sr_bf8_f32_dst_sel_1(ptr addrspace(1) %out, float %src, i32 %seed, float %scale) {
139 ; GFX950-LABEL: test_cvt_scalef32_sr_bf8_f32_dst_sel_1:
141 ; GFX950-NEXT: global_load_dword v5, v[0:1], off
142 ; GFX950-NEXT: s_waitcnt vmcnt(0)
143 ; GFX950-NEXT: v_cvt_scalef32_sr_bf8_f32 v5, v2, v3, v4 op_sel:[0,0,1,0]
144 ; GFX950-NEXT: global_store_dword v[0:1], v5, off
145 ; GFX950-NEXT: s_endpgm
146 %old = load i32, ptr addrspace(1) %out, align 4
147 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.f32(i32 %old, float %src, i32 %seed, float %scale, i32 1)
148 store i32 %cvt, ptr addrspace(1) %out, align 4
152 define amdgpu_ps void @test_cvt_scalef32_sr_bf8_f32_dst_sel_2(ptr addrspace(1) %out, float %src, i32 %seed, float %scale) {
153 ; GFX950-LABEL: test_cvt_scalef32_sr_bf8_f32_dst_sel_2:
155 ; GFX950-NEXT: global_load_dword v5, v[0:1], off
156 ; GFX950-NEXT: s_waitcnt vmcnt(0)
157 ; GFX950-NEXT: v_cvt_scalef32_sr_bf8_f32 v5, v2, v3, v4 op_sel:[0,0,0,1]
158 ; GFX950-NEXT: global_store_dword v[0:1], v5, off
159 ; GFX950-NEXT: s_endpgm
160 %old = load i32, ptr addrspace(1) %out, align 4
161 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.f32(i32 %old, float %src, i32 %seed, float %scale, i32 2)
162 store i32 %cvt, ptr addrspace(1) %out, align 4
166 define amdgpu_ps void @test_cvt_scalef32_sr_bf8_f32_dst_sel_3(ptr addrspace(1) %out, float %src, i32 %seed, float %scale) {
167 ; GFX950-LABEL: test_cvt_scalef32_sr_bf8_f32_dst_sel_3:
169 ; GFX950-NEXT: global_load_dword v5, v[0:1], off
170 ; GFX950-NEXT: s_waitcnt vmcnt(0)
171 ; GFX950-NEXT: v_cvt_scalef32_sr_bf8_f32 v5, v2, v3, v4 op_sel:[0,0,1,1]
172 ; GFX950-NEXT: global_store_dword v[0:1], v5, off
173 ; GFX950-NEXT: s_endpgm
174 %old = load i32, ptr addrspace(1) %out, align 4
175 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.f32(i32 %old, float %src, i32 %seed, float %scale, i32 3)
176 store i32 %cvt, ptr addrspace(1) %out, align 4
180 define amdgpu_ps void @test_cvt_scalef32_sr_fp8_bf16_dst_sel_0(ptr addrspace(1) %out, bfloat %src, i32 %seed, float %scale) {
181 ; GFX950-LABEL: test_cvt_scalef32_sr_fp8_bf16_dst_sel_0:
183 ; GFX950-NEXT: global_load_dword v5, v[0:1], off
184 ; GFX950-NEXT: s_waitcnt vmcnt(0)
185 ; GFX950-NEXT: v_cvt_scalef32_sr_fp8_bf16 v5, v2, v3, v4
186 ; GFX950-NEXT: global_store_dword v[0:1], v5, off
187 ; GFX950-NEXT: s_endpgm
188 %old = load i32, ptr addrspace(1) %out, align 4
189 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.bf16(i32 %old, bfloat %src, i32 %seed, float %scale, i32 0)
190 store i32 %cvt, ptr addrspace(1) %out, align 4
194 define amdgpu_ps void @test_cvt_scalef32_sr_fp8_bf16_dst_sel_1(ptr addrspace(1) %out, bfloat %src, i32 %seed, float %scale) {
195 ; GFX950-LABEL: test_cvt_scalef32_sr_fp8_bf16_dst_sel_1:
197 ; GFX950-NEXT: global_load_dword v5, v[0:1], off
198 ; GFX950-NEXT: s_waitcnt vmcnt(0)
199 ; GFX950-NEXT: v_cvt_scalef32_sr_fp8_bf16 v5, v2, v3, v4 op_sel:[0,0,1,0]
200 ; GFX950-NEXT: global_store_dword v[0:1], v5, off
201 ; GFX950-NEXT: s_endpgm
202 %old = load i32, ptr addrspace(1) %out, align 4
203 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.bf16(i32 %old, bfloat %src, i32 %seed, float %scale, i32 1)
204 store i32 %cvt, ptr addrspace(1) %out, align 4
208 define amdgpu_ps void @test_cvt_scalef32_sr_fp8_bf16_dst_sel_2(ptr addrspace(1) %out, bfloat %src, i32 %seed, float %scale) {
209 ; GFX950-LABEL: test_cvt_scalef32_sr_fp8_bf16_dst_sel_2:
211 ; GFX950-NEXT: global_load_dword v5, v[0:1], off
212 ; GFX950-NEXT: s_waitcnt vmcnt(0)
213 ; GFX950-NEXT: v_cvt_scalef32_sr_fp8_bf16 v5, v2, v3, v4 op_sel:[0,0,0,1]
214 ; GFX950-NEXT: global_store_dword v[0:1], v5, off
215 ; GFX950-NEXT: s_endpgm
216 %old = load i32, ptr addrspace(1) %out, align 4
217 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.bf16(i32 %old, bfloat %src, i32 %seed, float %scale, i32 2)
218 store i32 %cvt, ptr addrspace(1) %out, align 4
222 define amdgpu_ps void @test_cvt_scalef32_sr_fp8_bf16_dst_sel_3(ptr addrspace(1) %out, bfloat %src, i32 %seed, float %scale) {
223 ; GFX950-LABEL: test_cvt_scalef32_sr_fp8_bf16_dst_sel_3:
225 ; GFX950-NEXT: global_load_dword v5, v[0:1], off
226 ; GFX950-NEXT: s_waitcnt vmcnt(0)
227 ; GFX950-NEXT: v_cvt_scalef32_sr_fp8_bf16 v5, v2, v3, v4 op_sel:[0,0,1,1]
228 ; GFX950-NEXT: global_store_dword v[0:1], v5, off
229 ; GFX950-NEXT: s_endpgm
230 %old = load i32, ptr addrspace(1) %out, align 4
231 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.bf16(i32 %old, bfloat %src, i32 %seed, float %scale, i32 3)
232 store i32 %cvt, ptr addrspace(1) %out, align 4
236 define amdgpu_ps void @test_cvt_scalef32_sr_fp8_f16_dst_sel_0(ptr addrspace(1) %out, half %src, i32 %seed, float %scale) {
237 ; GFX950-LABEL: test_cvt_scalef32_sr_fp8_f16_dst_sel_0:
239 ; GFX950-NEXT: global_load_dword v5, v[0:1], off
240 ; GFX950-NEXT: s_waitcnt vmcnt(0)
241 ; GFX950-NEXT: v_cvt_scalef32_sr_fp8_f16 v5, v2, v3, v4
242 ; GFX950-NEXT: global_store_dword v[0:1], v5, off
243 ; GFX950-NEXT: s_endpgm
244 %old = load i32, ptr addrspace(1) %out, align 4
245 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.f16(i32 %old, half %src, i32 %seed, float %scale, i32 0)
246 store i32 %cvt, ptr addrspace(1) %out, align 4
250 define amdgpu_ps void @test_cvt_scalef32_sr_fp8_f16_dst_sel_1(ptr addrspace(1) %out, half %src, i32 %seed, float %scale) {
251 ; GFX950-LABEL: test_cvt_scalef32_sr_fp8_f16_dst_sel_1:
253 ; GFX950-NEXT: global_load_dword v5, v[0:1], off
254 ; GFX950-NEXT: s_waitcnt vmcnt(0)
255 ; GFX950-NEXT: v_cvt_scalef32_sr_fp8_f16 v5, v2, v3, v4 op_sel:[0,0,1,0]
256 ; GFX950-NEXT: global_store_dword v[0:1], v5, off
257 ; GFX950-NEXT: s_endpgm
258 %old = load i32, ptr addrspace(1) %out, align 4
259 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.f16(i32 %old, half %src, i32 %seed, float %scale, i32 1)
260 store i32 %cvt, ptr addrspace(1) %out, align 4
264 define amdgpu_ps void @test_cvt_scalef32_sr_fp8_f16_dst_sel_2(ptr addrspace(1) %out, half %src, i32 %seed, float %scale) {
265 ; GFX950-LABEL: test_cvt_scalef32_sr_fp8_f16_dst_sel_2:
267 ; GFX950-NEXT: global_load_dword v5, v[0:1], off
268 ; GFX950-NEXT: s_waitcnt vmcnt(0)
269 ; GFX950-NEXT: v_cvt_scalef32_sr_fp8_f16 v5, v2, v3, v4 op_sel:[0,0,0,1]
270 ; GFX950-NEXT: global_store_dword v[0:1], v5, off
271 ; GFX950-NEXT: s_endpgm
272 %old = load i32, ptr addrspace(1) %out, align 4
273 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.f16(i32 %old, half %src, i32 %seed, float %scale, i32 2)
274 store i32 %cvt, ptr addrspace(1) %out, align 4
278 define amdgpu_ps void @test_cvt_scalef32_sr_fp8_f16_dst_sel_3(ptr addrspace(1) %out, half %src, i32 %seed, float %scale) {
279 ; GFX950-LABEL: test_cvt_scalef32_sr_fp8_f16_dst_sel_3:
281 ; GFX950-NEXT: global_load_dword v5, v[0:1], off
282 ; GFX950-NEXT: s_waitcnt vmcnt(0)
283 ; GFX950-NEXT: v_cvt_scalef32_sr_fp8_f16 v5, v2, v3, v4 op_sel:[0,0,1,1]
284 ; GFX950-NEXT: global_store_dword v[0:1], v5, off
285 ; GFX950-NEXT: s_endpgm
286 %old = load i32, ptr addrspace(1) %out, align 4
287 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.f16(i32 %old, half %src, i32 %seed, float %scale, i32 3)
288 store i32 %cvt, ptr addrspace(1) %out, align 4
292 define amdgpu_ps void @test_cvt_scalef32_sr_fp8_f32_dst_sel_0(ptr addrspace(1) %out, float %src, i32 %seed, float %scale) {
293 ; GFX950-LABEL: test_cvt_scalef32_sr_fp8_f32_dst_sel_0:
295 ; GFX950-NEXT: global_load_dword v5, v[0:1], off
296 ; GFX950-NEXT: s_waitcnt vmcnt(0)
297 ; GFX950-NEXT: v_cvt_scalef32_sr_fp8_f32 v5, v2, v3, v4
298 ; GFX950-NEXT: global_store_dword v[0:1], v5, off
299 ; GFX950-NEXT: s_endpgm
300 %old = load i32, ptr addrspace(1) %out, align 4
301 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.f32(i32 %old, float %src, i32 %seed, float %scale, i32 0)
302 store i32 %cvt, ptr addrspace(1) %out, align 4
306 define amdgpu_ps void @test_cvt_scalef32_sr_fp8_f32_dst_sel_1(ptr addrspace(1) %out, float %src, i32 %seed, float %scale) {
307 ; GFX950-LABEL: test_cvt_scalef32_sr_fp8_f32_dst_sel_1:
309 ; GFX950-NEXT: global_load_dword v5, v[0:1], off
310 ; GFX950-NEXT: s_waitcnt vmcnt(0)
311 ; GFX950-NEXT: v_cvt_scalef32_sr_fp8_f32 v5, v2, v3, v4 op_sel:[0,0,1,0]
312 ; GFX950-NEXT: global_store_dword v[0:1], v5, off
313 ; GFX950-NEXT: s_endpgm
314 %old = load i32, ptr addrspace(1) %out, align 4
315 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.f32(i32 %old, float %src, i32 %seed, float %scale, i32 1)
316 store i32 %cvt, ptr addrspace(1) %out, align 4
320 define amdgpu_ps void @test_cvt_scalef32_sr_fp8_f32_dst_sel_2(ptr addrspace(1) %out, float %src, i32 %seed, float %scale) {
321 ; GFX950-LABEL: test_cvt_scalef32_sr_fp8_f32_dst_sel_2:
323 ; GFX950-NEXT: global_load_dword v5, v[0:1], off
324 ; GFX950-NEXT: s_waitcnt vmcnt(0)
325 ; GFX950-NEXT: v_cvt_scalef32_sr_fp8_f32 v5, v2, v3, v4 op_sel:[0,0,0,1]
326 ; GFX950-NEXT: global_store_dword v[0:1], v5, off
327 ; GFX950-NEXT: s_endpgm
328 %old = load i32, ptr addrspace(1) %out, align 4
329 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.f32(i32 %old, float %src, i32 %seed, float %scale, i32 2)
330 store i32 %cvt, ptr addrspace(1) %out, align 4
334 define amdgpu_ps void @test_cvt_scalef32_sr_fp8_f32_dst_sel_3(ptr addrspace(1) %out, float %src, i32 %seed, float %scale) {
335 ; GFX950-LABEL: test_cvt_scalef32_sr_fp8_f32_dst_sel_3:
337 ; GFX950-NEXT: global_load_dword v5, v[0:1], off
338 ; GFX950-NEXT: s_waitcnt vmcnt(0)
339 ; GFX950-NEXT: v_cvt_scalef32_sr_fp8_f32 v5, v2, v3, v4 op_sel:[0,0,1,1]
340 ; GFX950-NEXT: global_store_dword v[0:1], v5, off
341 ; GFX950-NEXT: s_endpgm
342 %old = load i32, ptr addrspace(1) %out, align 4
343 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.f32(i32 %old, float %src, i32 %seed, float %scale, i32 3)
344 store i32 %cvt, ptr addrspace(1) %out, align 4