1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=SI %s
3 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=VI %s
4 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11 %s
6 define amdgpu_kernel void @uitofp_i16_to_f16(
7 ; SI-LABEL: uitofp_i16_to_f16:
8 ; SI: ; %bb.0: ; %entry
9 ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
10 ; SI-NEXT: s_mov_b32 s7, 0xf000
11 ; SI-NEXT: s_mov_b32 s6, -1
12 ; SI-NEXT: s_mov_b32 s10, s6
13 ; SI-NEXT: s_mov_b32 s11, s7
14 ; SI-NEXT: s_waitcnt lgkmcnt(0)
15 ; SI-NEXT: s_mov_b32 s8, s2
16 ; SI-NEXT: s_mov_b32 s9, s3
17 ; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
18 ; SI-NEXT: s_mov_b32 s4, s0
19 ; SI-NEXT: s_mov_b32 s5, s1
20 ; SI-NEXT: s_waitcnt vmcnt(0)
21 ; SI-NEXT: v_cvt_f32_u32_e32 v0, v0
22 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
23 ; SI-NEXT: buffer_store_short v0, off, s[4:7], 0
26 ; VI-LABEL: uitofp_i16_to_f16:
27 ; VI: ; %bb.0: ; %entry
28 ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
29 ; VI-NEXT: s_mov_b32 s7, 0xf000
30 ; VI-NEXT: s_mov_b32 s6, -1
31 ; VI-NEXT: s_mov_b32 s10, s6
32 ; VI-NEXT: s_mov_b32 s11, s7
33 ; VI-NEXT: s_waitcnt lgkmcnt(0)
34 ; VI-NEXT: s_mov_b32 s8, s2
35 ; VI-NEXT: s_mov_b32 s9, s3
36 ; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
37 ; VI-NEXT: s_mov_b32 s4, s0
38 ; VI-NEXT: s_mov_b32 s5, s1
39 ; VI-NEXT: s_waitcnt vmcnt(0)
40 ; VI-NEXT: v_cvt_f16_u16_e32 v0, v0
41 ; VI-NEXT: buffer_store_short v0, off, s[4:7], 0
44 ; GFX11-LABEL: uitofp_i16_to_f16:
45 ; GFX11: ; %bb.0: ; %entry
46 ; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
47 ; GFX11-NEXT: s_mov_b32 s6, -1
48 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000
49 ; GFX11-NEXT: s_mov_b32 s10, s6
50 ; GFX11-NEXT: s_mov_b32 s11, s7
51 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
52 ; GFX11-NEXT: s_mov_b32 s8, s2
53 ; GFX11-NEXT: s_mov_b32 s9, s3
54 ; GFX11-NEXT: s_mov_b32 s4, s0
55 ; GFX11-NEXT: buffer_load_u16 v0, off, s[8:11], 0
56 ; GFX11-NEXT: s_mov_b32 s5, s1
57 ; GFX11-NEXT: s_waitcnt vmcnt(0)
58 ; GFX11-NEXT: v_cvt_f16_u16_e32 v0, v0
59 ; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
61 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
62 ; GFX11-NEXT: s_endpgm
64 ptr addrspace(1) %a) {
66 %a.val = load i16, ptr addrspace(1) %a
67 %r.val = uitofp i16 %a.val to half
68 store half %r.val, ptr addrspace(1) %r
72 define amdgpu_kernel void @uitofp_i32_to_f16(
73 ; SI-LABEL: uitofp_i32_to_f16:
74 ; SI: ; %bb.0: ; %entry
75 ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
76 ; SI-NEXT: s_mov_b32 s7, 0xf000
77 ; SI-NEXT: s_mov_b32 s6, -1
78 ; SI-NEXT: s_mov_b32 s10, s6
79 ; SI-NEXT: s_mov_b32 s11, s7
80 ; SI-NEXT: s_waitcnt lgkmcnt(0)
81 ; SI-NEXT: s_mov_b32 s8, s2
82 ; SI-NEXT: s_mov_b32 s9, s3
83 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
84 ; SI-NEXT: s_mov_b32 s4, s0
85 ; SI-NEXT: s_mov_b32 s5, s1
86 ; SI-NEXT: s_waitcnt vmcnt(0)
87 ; SI-NEXT: v_cvt_f32_u32_e32 v0, v0
88 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
89 ; SI-NEXT: buffer_store_short v0, off, s[4:7], 0
92 ; VI-LABEL: uitofp_i32_to_f16:
93 ; VI: ; %bb.0: ; %entry
94 ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
95 ; VI-NEXT: s_mov_b32 s7, 0xf000
96 ; VI-NEXT: s_mov_b32 s6, -1
97 ; VI-NEXT: s_mov_b32 s10, s6
98 ; VI-NEXT: s_mov_b32 s11, s7
99 ; VI-NEXT: s_waitcnt lgkmcnt(0)
100 ; VI-NEXT: s_mov_b32 s8, s2
101 ; VI-NEXT: s_mov_b32 s9, s3
102 ; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
103 ; VI-NEXT: s_mov_b32 s4, s0
104 ; VI-NEXT: s_mov_b32 s5, s1
105 ; VI-NEXT: s_waitcnt vmcnt(0)
106 ; VI-NEXT: v_cvt_f32_u32_e32 v0, v0
107 ; VI-NEXT: v_cvt_f16_f32_e32 v0, v0
108 ; VI-NEXT: buffer_store_short v0, off, s[4:7], 0
111 ; GFX11-LABEL: uitofp_i32_to_f16:
112 ; GFX11: ; %bb.0: ; %entry
113 ; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
114 ; GFX11-NEXT: s_mov_b32 s6, -1
115 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000
116 ; GFX11-NEXT: s_mov_b32 s10, s6
117 ; GFX11-NEXT: s_mov_b32 s11, s7
118 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
119 ; GFX11-NEXT: s_mov_b32 s8, s2
120 ; GFX11-NEXT: s_mov_b32 s9, s3
121 ; GFX11-NEXT: s_mov_b32 s4, s0
122 ; GFX11-NEXT: buffer_load_b32 v0, off, s[8:11], 0
123 ; GFX11-NEXT: s_mov_b32 s5, s1
124 ; GFX11-NEXT: s_waitcnt vmcnt(0)
125 ; GFX11-NEXT: v_cvt_f32_u32_e32 v0, v0
126 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
127 ; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
128 ; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
129 ; GFX11-NEXT: s_nop 0
130 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
131 ; GFX11-NEXT: s_endpgm
133 ptr addrspace(1) %a) {
135 %a.val = load i32, ptr addrspace(1) %a
136 %r.val = uitofp i32 %a.val to half
137 store half %r.val, ptr addrspace(1) %r
141 ; f16 = uitofp i64 is in uint_to_fp.i64.ll
143 define amdgpu_kernel void @uitofp_v2i16_to_v2f16(
144 ; SI-LABEL: uitofp_v2i16_to_v2f16:
145 ; SI: ; %bb.0: ; %entry
146 ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
147 ; SI-NEXT: s_mov_b32 s7, 0xf000
148 ; SI-NEXT: s_mov_b32 s6, -1
149 ; SI-NEXT: s_mov_b32 s10, s6
150 ; SI-NEXT: s_mov_b32 s11, s7
151 ; SI-NEXT: s_waitcnt lgkmcnt(0)
152 ; SI-NEXT: s_mov_b32 s8, s2
153 ; SI-NEXT: s_mov_b32 s9, s3
154 ; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0
155 ; SI-NEXT: s_mov_b32 s4, s0
156 ; SI-NEXT: s_mov_b32 s5, s1
157 ; SI-NEXT: s_waitcnt vmcnt(0)
158 ; SI-NEXT: v_and_b32_e32 v1, 0xffff, v0
159 ; SI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
160 ; SI-NEXT: v_cvt_f32_u32_e32 v0, v0
161 ; SI-NEXT: v_cvt_f32_u32_e32 v1, v1
162 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
163 ; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
164 ; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
165 ; SI-NEXT: v_or_b32_e32 v0, v1, v0
166 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
169 ; VI-LABEL: uitofp_v2i16_to_v2f16:
170 ; VI: ; %bb.0: ; %entry
171 ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
172 ; VI-NEXT: s_mov_b32 s7, 0xf000
173 ; VI-NEXT: s_mov_b32 s6, -1
174 ; VI-NEXT: s_mov_b32 s10, s6
175 ; VI-NEXT: s_mov_b32 s11, s7
176 ; VI-NEXT: s_waitcnt lgkmcnt(0)
177 ; VI-NEXT: s_mov_b32 s8, s2
178 ; VI-NEXT: s_mov_b32 s9, s3
179 ; VI-NEXT: buffer_load_dword v0, off, s[8:11], 0
180 ; VI-NEXT: s_mov_b32 s4, s0
181 ; VI-NEXT: s_mov_b32 s5, s1
182 ; VI-NEXT: s_waitcnt vmcnt(0)
183 ; VI-NEXT: v_cvt_f16_u16_sdwa v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
184 ; VI-NEXT: v_cvt_f16_u16_e32 v0, v0
185 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
186 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
189 ; GFX11-LABEL: uitofp_v2i16_to_v2f16:
190 ; GFX11: ; %bb.0: ; %entry
191 ; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
192 ; GFX11-NEXT: s_mov_b32 s6, -1
193 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000
194 ; GFX11-NEXT: s_mov_b32 s10, s6
195 ; GFX11-NEXT: s_mov_b32 s11, s7
196 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
197 ; GFX11-NEXT: s_mov_b32 s8, s2
198 ; GFX11-NEXT: s_mov_b32 s9, s3
199 ; GFX11-NEXT: s_mov_b32 s4, s0
200 ; GFX11-NEXT: buffer_load_b32 v0, off, s[8:11], 0
201 ; GFX11-NEXT: s_mov_b32 s5, s1
202 ; GFX11-NEXT: s_waitcnt vmcnt(0)
203 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
204 ; GFX11-NEXT: v_cvt_f16_u16_e32 v0, v0
205 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
206 ; GFX11-NEXT: v_cvt_f16_u16_e32 v1, v1
207 ; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
208 ; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
209 ; GFX11-NEXT: s_nop 0
210 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
211 ; GFX11-NEXT: s_endpgm
213 ptr addrspace(1) %a) {
215 %a.val = load <2 x i16>, ptr addrspace(1) %a
216 %r.val = uitofp <2 x i16> %a.val to <2 x half>
217 store <2 x half> %r.val, ptr addrspace(1) %r
221 define amdgpu_kernel void @uitofp_v2i32_to_v2f16(
222 ; SI-LABEL: uitofp_v2i32_to_v2f16:
223 ; SI: ; %bb.0: ; %entry
224 ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
225 ; SI-NEXT: s_mov_b32 s7, 0xf000
226 ; SI-NEXT: s_mov_b32 s6, -1
227 ; SI-NEXT: s_mov_b32 s10, s6
228 ; SI-NEXT: s_mov_b32 s11, s7
229 ; SI-NEXT: s_waitcnt lgkmcnt(0)
230 ; SI-NEXT: s_mov_b32 s8, s2
231 ; SI-NEXT: s_mov_b32 s9, s3
232 ; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
233 ; SI-NEXT: s_mov_b32 s4, s0
234 ; SI-NEXT: s_mov_b32 s5, s1
235 ; SI-NEXT: s_waitcnt vmcnt(0)
236 ; SI-NEXT: v_cvt_f32_u32_e32 v1, v1
237 ; SI-NEXT: v_cvt_f32_u32_e32 v0, v0
238 ; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
239 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
240 ; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
241 ; SI-NEXT: v_or_b32_e32 v0, v0, v1
242 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
245 ; VI-LABEL: uitofp_v2i32_to_v2f16:
246 ; VI: ; %bb.0: ; %entry
247 ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
248 ; VI-NEXT: s_mov_b32 s7, 0xf000
249 ; VI-NEXT: s_mov_b32 s6, -1
250 ; VI-NEXT: s_mov_b32 s10, s6
251 ; VI-NEXT: s_mov_b32 s11, s7
252 ; VI-NEXT: s_waitcnt lgkmcnt(0)
253 ; VI-NEXT: s_mov_b32 s8, s2
254 ; VI-NEXT: s_mov_b32 s9, s3
255 ; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
256 ; VI-NEXT: s_mov_b32 s4, s0
257 ; VI-NEXT: s_mov_b32 s5, s1
258 ; VI-NEXT: s_waitcnt vmcnt(0)
259 ; VI-NEXT: v_cvt_f32_u32_e32 v1, v1
260 ; VI-NEXT: v_cvt_f32_u32_e32 v0, v0
261 ; VI-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
262 ; VI-NEXT: v_cvt_f16_f32_e32 v0, v0
263 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
264 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
267 ; GFX11-LABEL: uitofp_v2i32_to_v2f16:
268 ; GFX11: ; %bb.0: ; %entry
269 ; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
270 ; GFX11-NEXT: s_mov_b32 s6, -1
271 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000
272 ; GFX11-NEXT: s_mov_b32 s10, s6
273 ; GFX11-NEXT: s_mov_b32 s11, s7
274 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
275 ; GFX11-NEXT: s_mov_b32 s8, s2
276 ; GFX11-NEXT: s_mov_b32 s9, s3
277 ; GFX11-NEXT: s_mov_b32 s4, s0
278 ; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[8:11], 0
279 ; GFX11-NEXT: s_mov_b32 s5, s1
280 ; GFX11-NEXT: s_waitcnt vmcnt(0)
281 ; GFX11-NEXT: v_cvt_f32_u32_e32 v1, v1
282 ; GFX11-NEXT: v_cvt_f32_u32_e32 v0, v0
283 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
284 ; GFX11-NEXT: v_cvt_f16_f32_e32 v1, v1
285 ; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
286 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
287 ; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
288 ; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
289 ; GFX11-NEXT: s_nop 0
290 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
291 ; GFX11-NEXT: s_endpgm
293 ptr addrspace(1) %a) {
295 %a.val = load <2 x i32>, ptr addrspace(1) %a
296 %r.val = uitofp <2 x i32> %a.val to <2 x half>
297 store <2 x half> %r.val, ptr addrspace(1) %r
301 define amdgpu_kernel void @s_uint_to_fp_i1_to_f16(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) {
302 ; SI-LABEL: s_uint_to_fp_i1_to_f16:
304 ; SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x9
305 ; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0xd
306 ; SI-NEXT: s_mov_b32 s11, 0xf000
307 ; SI-NEXT: s_mov_b32 s10, -1
308 ; SI-NEXT: s_mov_b32 s2, s10
309 ; SI-NEXT: s_mov_b32 s3, s11
310 ; SI-NEXT: s_waitcnt lgkmcnt(0)
311 ; SI-NEXT: s_mov_b32 s12, s6
312 ; SI-NEXT: s_mov_b32 s13, s7
313 ; SI-NEXT: s_mov_b32 s14, s10
314 ; SI-NEXT: s_mov_b32 s15, s11
315 ; SI-NEXT: buffer_load_dword v0, off, s[0:3], 0
316 ; SI-NEXT: buffer_load_dword v1, off, s[12:15], 0
317 ; SI-NEXT: s_mov_b32 s8, s4
318 ; SI-NEXT: s_mov_b32 s9, s5
319 ; SI-NEXT: s_waitcnt vmcnt(1)
320 ; SI-NEXT: v_cmp_le_f32_e32 vcc, 1.0, v0
321 ; SI-NEXT: s_waitcnt vmcnt(0)
322 ; SI-NEXT: v_cmp_le_f32_e64 s[0:1], 0, v1
323 ; SI-NEXT: s_xor_b64 s[0:1], s[0:1], vcc
324 ; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s[0:1]
325 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
326 ; SI-NEXT: buffer_store_short v0, off, s[8:11], 0
329 ; VI-LABEL: s_uint_to_fp_i1_to_f16:
331 ; VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
332 ; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
333 ; VI-NEXT: s_mov_b32 s11, 0xf000
334 ; VI-NEXT: s_mov_b32 s10, -1
335 ; VI-NEXT: s_mov_b32 s2, s10
336 ; VI-NEXT: s_mov_b32 s3, s11
337 ; VI-NEXT: s_waitcnt lgkmcnt(0)
338 ; VI-NEXT: s_mov_b32 s12, s6
339 ; VI-NEXT: s_mov_b32 s13, s7
340 ; VI-NEXT: s_mov_b32 s14, s10
341 ; VI-NEXT: s_mov_b32 s15, s11
342 ; VI-NEXT: buffer_load_dword v0, off, s[0:3], 0
343 ; VI-NEXT: buffer_load_dword v1, off, s[12:15], 0
344 ; VI-NEXT: s_mov_b32 s8, s4
345 ; VI-NEXT: s_mov_b32 s9, s5
346 ; VI-NEXT: s_waitcnt vmcnt(1)
347 ; VI-NEXT: v_cmp_le_f32_e32 vcc, 1.0, v0
348 ; VI-NEXT: s_waitcnt vmcnt(0)
349 ; VI-NEXT: v_cmp_le_f32_e64 s[0:1], 0, v1
350 ; VI-NEXT: s_xor_b64 s[0:1], s[0:1], vcc
351 ; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s[0:1]
352 ; VI-NEXT: v_cvt_f16_f32_e32 v0, v0
353 ; VI-NEXT: buffer_store_short v0, off, s[8:11], 0
356 ; GFX11-LABEL: s_uint_to_fp_i1_to_f16:
358 ; GFX11-NEXT: s_clause 0x1
359 ; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
360 ; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
361 ; GFX11-NEXT: s_mov_b32 s10, -1
362 ; GFX11-NEXT: s_mov_b32 s11, 0x31016000
363 ; GFX11-NEXT: s_mov_b32 s2, s10
364 ; GFX11-NEXT: s_mov_b32 s3, s11
365 ; GFX11-NEXT: s_mov_b32 s14, s10
366 ; GFX11-NEXT: s_mov_b32 s15, s11
367 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
368 ; GFX11-NEXT: s_mov_b32 s12, s6
369 ; GFX11-NEXT: s_mov_b32 s13, s7
370 ; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0
371 ; GFX11-NEXT: buffer_load_b32 v1, off, s[12:15], 0
372 ; GFX11-NEXT: s_mov_b32 s8, s4
373 ; GFX11-NEXT: s_mov_b32 s9, s5
374 ; GFX11-NEXT: s_waitcnt vmcnt(1)
375 ; GFX11-NEXT: v_cmp_le_f32_e32 vcc_lo, 1.0, v0
376 ; GFX11-NEXT: s_waitcnt vmcnt(0)
377 ; GFX11-NEXT: v_cmp_le_f32_e64 s0, 0, v1
378 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
379 ; GFX11-NEXT: s_xor_b32 s0, s0, vcc_lo
380 ; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s0
381 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
382 ; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
383 ; GFX11-NEXT: buffer_store_b16 v0, off, s[8:11], 0
384 ; GFX11-NEXT: s_nop 0
385 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
386 ; GFX11-NEXT: s_endpgm
387 %a = load float, ptr addrspace(1) %in0
388 %b = load float, ptr addrspace(1) %in1
389 %acmp = fcmp oge float %a, 0.000000e+00
390 %bcmp = fcmp oge float %b, 1.000000e+00
391 %result = xor i1 %acmp, %bcmp
392 %fp = uitofp i1 %result to half
393 store half %fp, ptr addrspace(1) %out
397 ; f16 = uitofp i64 is in uint_to_fp.i64.ll