1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -global-isel=0 -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=SI-SDAG %s
3 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -global-isel=1 -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=SI-GISEL %s
4 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -global-isel=0 -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=VI-SDAG %s
5 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -global-isel=1 -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=VI-GISEL %s
6 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -global-isel=0 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-SDAG %s
7 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -global-isel=1 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-GISEL %s
8 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-SDAG %s
9 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL %s
11 define amdgpu_kernel void @fptrunc_f32_to_f16(
12 ; SI-SDAG-LABEL: fptrunc_f32_to_f16:
13 ; SI-SDAG: ; %bb.0: ; %entry
14 ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
15 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
16 ; SI-SDAG-NEXT: s_mov_b32 s6, -1
17 ; SI-SDAG-NEXT: s_mov_b32 s10, s6
18 ; SI-SDAG-NEXT: s_mov_b32 s11, s7
19 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
20 ; SI-SDAG-NEXT: s_mov_b32 s8, s2
21 ; SI-SDAG-NEXT: s_mov_b32 s9, s3
22 ; SI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0
23 ; SI-SDAG-NEXT: s_mov_b32 s4, s0
24 ; SI-SDAG-NEXT: s_mov_b32 s5, s1
25 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
26 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
27 ; SI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0
28 ; SI-SDAG-NEXT: s_endpgm
30 ; SI-GISEL-LABEL: fptrunc_f32_to_f16:
31 ; SI-GISEL: ; %bb.0: ; %entry
32 ; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
33 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
34 ; SI-GISEL-NEXT: s_load_dword s3, s[2:3], 0x0
35 ; SI-GISEL-NEXT: s_mov_b32 s2, -1
36 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
37 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s3
38 ; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
39 ; SI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
40 ; SI-GISEL-NEXT: s_endpgm
42 ; VI-SDAG-LABEL: fptrunc_f32_to_f16:
43 ; VI-SDAG: ; %bb.0: ; %entry
44 ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
45 ; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000
46 ; VI-SDAG-NEXT: s_mov_b32 s6, -1
47 ; VI-SDAG-NEXT: s_mov_b32 s10, s6
48 ; VI-SDAG-NEXT: s_mov_b32 s11, s7
49 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
50 ; VI-SDAG-NEXT: s_mov_b32 s8, s2
51 ; VI-SDAG-NEXT: s_mov_b32 s9, s3
52 ; VI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0
53 ; VI-SDAG-NEXT: s_mov_b32 s4, s0
54 ; VI-SDAG-NEXT: s_mov_b32 s5, s1
55 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
56 ; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
57 ; VI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0
58 ; VI-SDAG-NEXT: s_endpgm
60 ; VI-GISEL-LABEL: fptrunc_f32_to_f16:
61 ; VI-GISEL: ; %bb.0: ; %entry
62 ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
63 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
64 ; VI-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0
65 ; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000
66 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
67 ; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2
68 ; VI-GISEL-NEXT: s_mov_b32 s2, -1
69 ; VI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
70 ; VI-GISEL-NEXT: s_endpgm
72 ; GFX9-SDAG-LABEL: fptrunc_f32_to_f16:
73 ; GFX9-SDAG: ; %bb.0: ; %entry
74 ; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
75 ; GFX9-SDAG-NEXT: s_mov_b32 s7, 0xf000
76 ; GFX9-SDAG-NEXT: s_mov_b32 s6, -1
77 ; GFX9-SDAG-NEXT: s_mov_b32 s10, s6
78 ; GFX9-SDAG-NEXT: s_mov_b32 s11, s7
79 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
80 ; GFX9-SDAG-NEXT: s_mov_b32 s8, s2
81 ; GFX9-SDAG-NEXT: s_mov_b32 s9, s3
82 ; GFX9-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0
83 ; GFX9-SDAG-NEXT: s_mov_b32 s4, s0
84 ; GFX9-SDAG-NEXT: s_mov_b32 s5, s1
85 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
86 ; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
87 ; GFX9-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0
88 ; GFX9-SDAG-NEXT: s_endpgm
90 ; GFX9-GISEL-LABEL: fptrunc_f32_to_f16:
91 ; GFX9-GISEL: ; %bb.0: ; %entry
92 ; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
93 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
94 ; GFX9-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0
95 ; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000
96 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
97 ; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2
98 ; GFX9-GISEL-NEXT: s_mov_b32 s2, -1
99 ; GFX9-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
100 ; GFX9-GISEL-NEXT: s_endpgm
102 ; GFX11-SDAG-LABEL: fptrunc_f32_to_f16:
103 ; GFX11-SDAG: ; %bb.0: ; %entry
104 ; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
105 ; GFX11-SDAG-NEXT: s_mov_b32 s6, -1
106 ; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000
107 ; GFX11-SDAG-NEXT: s_mov_b32 s10, s6
108 ; GFX11-SDAG-NEXT: s_mov_b32 s11, s7
109 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
110 ; GFX11-SDAG-NEXT: s_mov_b32 s8, s2
111 ; GFX11-SDAG-NEXT: s_mov_b32 s9, s3
112 ; GFX11-SDAG-NEXT: s_mov_b32 s4, s0
113 ; GFX11-SDAG-NEXT: buffer_load_b32 v0, off, s[8:11], 0
114 ; GFX11-SDAG-NEXT: s_mov_b32 s5, s1
115 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
116 ; GFX11-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
117 ; GFX11-SDAG-NEXT: buffer_store_b16 v0, off, s[4:7], 0
118 ; GFX11-SDAG-NEXT: s_nop 0
119 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
120 ; GFX11-SDAG-NEXT: s_endpgm
122 ; GFX11-GISEL-LABEL: fptrunc_f32_to_f16:
123 ; GFX11-GISEL: ; %bb.0: ; %entry
124 ; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
125 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
126 ; GFX11-GISEL-NEXT: s_load_b32 s2, s[2:3], 0x0
127 ; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
128 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
129 ; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2
130 ; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
131 ; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
132 ; GFX11-GISEL-NEXT: s_nop 0
133 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
134 ; GFX11-GISEL-NEXT: s_endpgm
136 ptr addrspace(1) %a) {
138 %a.val = load float, ptr addrspace(1) %a
139 %r.val = fptrunc float %a.val to half
140 store half %r.val, ptr addrspace(1) %r
144 define amdgpu_kernel void @fptrunc_f64_to_f16(
145 ; SI-SDAG-LABEL: fptrunc_f64_to_f16:
146 ; SI-SDAG: ; %bb.0: ; %entry
147 ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
148 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
149 ; SI-SDAG-NEXT: s_mov_b32 s6, -1
150 ; SI-SDAG-NEXT: s_mov_b32 s10, s6
151 ; SI-SDAG-NEXT: s_mov_b32 s11, s7
152 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
153 ; SI-SDAG-NEXT: s_mov_b32 s8, s2
154 ; SI-SDAG-NEXT: s_mov_b32 s9, s3
155 ; SI-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
156 ; SI-SDAG-NEXT: s_mov_b32 s4, s0
157 ; SI-SDAG-NEXT: s_mov_b32 s5, s1
158 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
159 ; SI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
160 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
161 ; SI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0
162 ; SI-SDAG-NEXT: s_endpgm
164 ; SI-GISEL-LABEL: fptrunc_f64_to_f16:
165 ; SI-GISEL: ; %bb.0: ; %entry
166 ; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
167 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
168 ; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
169 ; SI-GISEL-NEXT: s_mov_b32 s2, -1
170 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
171 ; SI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
172 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
173 ; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
174 ; SI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
175 ; SI-GISEL-NEXT: s_endpgm
177 ; VI-SDAG-LABEL: fptrunc_f64_to_f16:
178 ; VI-SDAG: ; %bb.0: ; %entry
179 ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
180 ; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000
181 ; VI-SDAG-NEXT: s_mov_b32 s6, -1
182 ; VI-SDAG-NEXT: s_mov_b32 s10, s6
183 ; VI-SDAG-NEXT: s_mov_b32 s11, s7
184 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
185 ; VI-SDAG-NEXT: s_mov_b32 s8, s2
186 ; VI-SDAG-NEXT: s_mov_b32 s9, s3
187 ; VI-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
188 ; VI-SDAG-NEXT: s_mov_b32 s4, s0
189 ; VI-SDAG-NEXT: s_mov_b32 s5, s1
190 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
191 ; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
192 ; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
193 ; VI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0
194 ; VI-SDAG-NEXT: s_endpgm
196 ; VI-GISEL-LABEL: fptrunc_f64_to_f16:
197 ; VI-GISEL: ; %bb.0: ; %entry
198 ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
199 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
200 ; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
201 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
202 ; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
203 ; VI-GISEL-NEXT: s_mov_b32 s2, -1
204 ; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000
205 ; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
206 ; VI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
207 ; VI-GISEL-NEXT: s_endpgm
209 ; GFX9-SDAG-LABEL: fptrunc_f64_to_f16:
210 ; GFX9-SDAG: ; %bb.0: ; %entry
211 ; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
212 ; GFX9-SDAG-NEXT: s_mov_b32 s7, 0xf000
213 ; GFX9-SDAG-NEXT: s_mov_b32 s6, -1
214 ; GFX9-SDAG-NEXT: s_mov_b32 s10, s6
215 ; GFX9-SDAG-NEXT: s_mov_b32 s11, s7
216 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
217 ; GFX9-SDAG-NEXT: s_mov_b32 s8, s2
218 ; GFX9-SDAG-NEXT: s_mov_b32 s9, s3
219 ; GFX9-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
220 ; GFX9-SDAG-NEXT: s_mov_b32 s4, s0
221 ; GFX9-SDAG-NEXT: s_mov_b32 s5, s1
222 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
223 ; GFX9-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
224 ; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
225 ; GFX9-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0
226 ; GFX9-SDAG-NEXT: s_endpgm
228 ; GFX9-GISEL-LABEL: fptrunc_f64_to_f16:
229 ; GFX9-GISEL: ; %bb.0: ; %entry
230 ; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
231 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
232 ; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
233 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
234 ; GFX9-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
235 ; GFX9-GISEL-NEXT: s_mov_b32 s2, -1
236 ; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000
237 ; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
238 ; GFX9-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
239 ; GFX9-GISEL-NEXT: s_endpgm
241 ; GFX11-SDAG-LABEL: fptrunc_f64_to_f16:
242 ; GFX11-SDAG: ; %bb.0: ; %entry
243 ; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
244 ; GFX11-SDAG-NEXT: s_mov_b32 s6, -1
245 ; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000
246 ; GFX11-SDAG-NEXT: s_mov_b32 s10, s6
247 ; GFX11-SDAG-NEXT: s_mov_b32 s11, s7
248 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
249 ; GFX11-SDAG-NEXT: s_mov_b32 s8, s2
250 ; GFX11-SDAG-NEXT: s_mov_b32 s9, s3
251 ; GFX11-SDAG-NEXT: s_mov_b32 s4, s0
252 ; GFX11-SDAG-NEXT: buffer_load_b64 v[0:1], off, s[8:11], 0
253 ; GFX11-SDAG-NEXT: s_mov_b32 s5, s1
254 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
255 ; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
256 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
257 ; GFX11-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
258 ; GFX11-SDAG-NEXT: buffer_store_b16 v0, off, s[4:7], 0
259 ; GFX11-SDAG-NEXT: s_nop 0
260 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
261 ; GFX11-SDAG-NEXT: s_endpgm
263 ; GFX11-GISEL-LABEL: fptrunc_f64_to_f16:
264 ; GFX11-GISEL: ; %bb.0: ; %entry
265 ; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
266 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
267 ; GFX11-GISEL-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
268 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
269 ; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
270 ; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
271 ; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
272 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
273 ; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
274 ; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
275 ; GFX11-GISEL-NEXT: s_nop 0
276 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
277 ; GFX11-GISEL-NEXT: s_endpgm
279 ptr addrspace(1) %a) {
281 %a.val = load double, ptr addrspace(1) %a
282 %r.val = fptrunc double %a.val to half
283 store half %r.val, ptr addrspace(1) %r
287 define amdgpu_kernel void @fptrunc_v2f32_to_v2f16(
288 ; SI-SDAG-LABEL: fptrunc_v2f32_to_v2f16:
289 ; SI-SDAG: ; %bb.0: ; %entry
290 ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
291 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
292 ; SI-SDAG-NEXT: s_mov_b32 s6, -1
293 ; SI-SDAG-NEXT: s_mov_b32 s10, s6
294 ; SI-SDAG-NEXT: s_mov_b32 s11, s7
295 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
296 ; SI-SDAG-NEXT: s_mov_b32 s8, s2
297 ; SI-SDAG-NEXT: s_mov_b32 s9, s3
298 ; SI-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
299 ; SI-SDAG-NEXT: s_mov_b32 s4, s0
300 ; SI-SDAG-NEXT: s_mov_b32 s5, s1
301 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
302 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
303 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
304 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
305 ; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
306 ; SI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
307 ; SI-SDAG-NEXT: s_endpgm
309 ; SI-GISEL-LABEL: fptrunc_v2f32_to_v2f16:
310 ; SI-GISEL: ; %bb.0: ; %entry
311 ; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
312 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
313 ; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
314 ; SI-GISEL-NEXT: s_mov_b32 s2, -1
315 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
316 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s4
317 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, s5
318 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
319 ; SI-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
320 ; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
321 ; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
322 ; SI-GISEL-NEXT: s_endpgm
324 ; VI-SDAG-LABEL: fptrunc_v2f32_to_v2f16:
325 ; VI-SDAG: ; %bb.0: ; %entry
326 ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
327 ; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000
328 ; VI-SDAG-NEXT: s_mov_b32 s6, -1
329 ; VI-SDAG-NEXT: s_mov_b32 s10, s6
330 ; VI-SDAG-NEXT: s_mov_b32 s11, s7
331 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
332 ; VI-SDAG-NEXT: s_mov_b32 s8, s2
333 ; VI-SDAG-NEXT: s_mov_b32 s9, s3
334 ; VI-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
335 ; VI-SDAG-NEXT: s_mov_b32 s4, s0
336 ; VI-SDAG-NEXT: s_mov_b32 s5, s1
337 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
338 ; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
339 ; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
340 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
341 ; VI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
342 ; VI-SDAG-NEXT: s_endpgm
344 ; VI-GISEL-LABEL: fptrunc_v2f32_to_v2f16:
345 ; VI-GISEL: ; %bb.0: ; %entry
346 ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
347 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
348 ; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
349 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
350 ; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
351 ; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2
352 ; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
353 ; VI-GISEL-NEXT: s_mov_b32 s2, -1
354 ; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000
355 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
356 ; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
357 ; VI-GISEL-NEXT: s_endpgm
359 ; GFX9-SDAG-LABEL: fptrunc_v2f32_to_v2f16:
360 ; GFX9-SDAG: ; %bb.0: ; %entry
361 ; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
362 ; GFX9-SDAG-NEXT: s_mov_b32 s7, 0xf000
363 ; GFX9-SDAG-NEXT: s_mov_b32 s6, -1
364 ; GFX9-SDAG-NEXT: s_mov_b32 s10, s6
365 ; GFX9-SDAG-NEXT: s_mov_b32 s11, s7
366 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
367 ; GFX9-SDAG-NEXT: s_mov_b32 s8, s2
368 ; GFX9-SDAG-NEXT: s_mov_b32 s9, s3
369 ; GFX9-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
370 ; GFX9-SDAG-NEXT: s_mov_b32 s4, s0
371 ; GFX9-SDAG-NEXT: s_mov_b32 s5, s1
372 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
373 ; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
374 ; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
375 ; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
376 ; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
377 ; GFX9-SDAG-NEXT: s_endpgm
379 ; GFX9-GISEL-LABEL: fptrunc_v2f32_to_v2f16:
380 ; GFX9-GISEL: ; %bb.0: ; %entry
381 ; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
382 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
383 ; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
384 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
385 ; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2
386 ; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v1, s3
387 ; GFX9-GISEL-NEXT: s_mov_b32 s2, -1
388 ; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000
389 ; GFX9-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1
390 ; GFX9-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
391 ; GFX9-GISEL-NEXT: s_endpgm
393 ; GFX11-SDAG-LABEL: fptrunc_v2f32_to_v2f16:
394 ; GFX11-SDAG: ; %bb.0: ; %entry
395 ; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
396 ; GFX11-SDAG-NEXT: s_mov_b32 s6, -1
397 ; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000
398 ; GFX11-SDAG-NEXT: s_mov_b32 s10, s6
399 ; GFX11-SDAG-NEXT: s_mov_b32 s11, s7
400 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
401 ; GFX11-SDAG-NEXT: s_mov_b32 s8, s2
402 ; GFX11-SDAG-NEXT: s_mov_b32 s9, s3
403 ; GFX11-SDAG-NEXT: s_mov_b32 s4, s0
404 ; GFX11-SDAG-NEXT: buffer_load_b64 v[0:1], off, s[8:11], 0
405 ; GFX11-SDAG-NEXT: s_mov_b32 s5, s1
406 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
407 ; GFX11-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
408 ; GFX11-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
409 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
410 ; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
411 ; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0
412 ; GFX11-SDAG-NEXT: s_nop 0
413 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
414 ; GFX11-SDAG-NEXT: s_endpgm
416 ; GFX11-GISEL-LABEL: fptrunc_v2f32_to_v2f16:
417 ; GFX11-GISEL: ; %bb.0: ; %entry
418 ; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
419 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
420 ; GFX11-GISEL-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
421 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
422 ; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2
423 ; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v1, s3
424 ; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
425 ; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
426 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
427 ; GFX11-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1
428 ; GFX11-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0
429 ; GFX11-GISEL-NEXT: s_nop 0
430 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
431 ; GFX11-GISEL-NEXT: s_endpgm
433 ptr addrspace(1) %a) {
435 %a.val = load <2 x float>, ptr addrspace(1) %a
436 %r.val = fptrunc <2 x float> %a.val to <2 x half>
437 store <2 x half> %r.val, ptr addrspace(1) %r
441 define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
442 ; SI-SDAG-LABEL: fptrunc_v2f64_to_v2f16:
443 ; SI-SDAG: ; %bb.0: ; %entry
444 ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
445 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
446 ; SI-SDAG-NEXT: s_mov_b32 s6, -1
447 ; SI-SDAG-NEXT: s_mov_b32 s10, s6
448 ; SI-SDAG-NEXT: s_mov_b32 s11, s7
449 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
450 ; SI-SDAG-NEXT: s_mov_b32 s8, s2
451 ; SI-SDAG-NEXT: s_mov_b32 s9, s3
452 ; SI-SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
453 ; SI-SDAG-NEXT: s_mov_b32 s4, s0
454 ; SI-SDAG-NEXT: s_mov_b32 s5, s1
455 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
456 ; SI-SDAG-NEXT: v_cvt_f32_f64_e32 v2, v[2:3]
457 ; SI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
458 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v2
459 ; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
460 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
461 ; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
462 ; SI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
463 ; SI-SDAG-NEXT: s_endpgm
465 ; SI-GISEL-LABEL: fptrunc_v2f64_to_v2f16:
466 ; SI-GISEL: ; %bb.0: ; %entry
467 ; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
468 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
469 ; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
470 ; SI-GISEL-NEXT: s_mov_b32 s2, -1
471 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
472 ; SI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
473 ; SI-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7]
474 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
475 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
476 ; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
477 ; SI-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
478 ; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
479 ; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
480 ; SI-GISEL-NEXT: s_endpgm
482 ; VI-SDAG-LABEL: fptrunc_v2f64_to_v2f16:
483 ; VI-SDAG: ; %bb.0: ; %entry
484 ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
485 ; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000
486 ; VI-SDAG-NEXT: s_mov_b32 s6, -1
487 ; VI-SDAG-NEXT: s_mov_b32 s10, s6
488 ; VI-SDAG-NEXT: s_mov_b32 s11, s7
489 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
490 ; VI-SDAG-NEXT: s_mov_b32 s8, s2
491 ; VI-SDAG-NEXT: s_mov_b32 s9, s3
492 ; VI-SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
493 ; VI-SDAG-NEXT: s_mov_b32 s4, s0
494 ; VI-SDAG-NEXT: s_mov_b32 s5, s1
495 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
496 ; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v2, v[2:3]
497 ; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
498 ; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
499 ; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
500 ; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
501 ; VI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
502 ; VI-SDAG-NEXT: s_endpgm
504 ; VI-GISEL-LABEL: fptrunc_v2f64_to_v2f16:
505 ; VI-GISEL: ; %bb.0: ; %entry
506 ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
507 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
508 ; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
509 ; VI-GISEL-NEXT: s_mov_b32 s2, -1
510 ; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000
511 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
512 ; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
513 ; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7]
514 ; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
515 ; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
516 ; VI-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
517 ; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
518 ; VI-GISEL-NEXT: s_endpgm
520 ; GFX9-SDAG-LABEL: fptrunc_v2f64_to_v2f16:
521 ; GFX9-SDAG: ; %bb.0: ; %entry
522 ; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
523 ; GFX9-SDAG-NEXT: s_mov_b32 s7, 0xf000
524 ; GFX9-SDAG-NEXT: s_mov_b32 s6, -1
525 ; GFX9-SDAG-NEXT: s_mov_b32 s10, s6
526 ; GFX9-SDAG-NEXT: s_mov_b32 s11, s7
527 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
528 ; GFX9-SDAG-NEXT: s_mov_b32 s8, s2
529 ; GFX9-SDAG-NEXT: s_mov_b32 s9, s3
530 ; GFX9-SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
531 ; GFX9-SDAG-NEXT: s_mov_b32 s4, s0
532 ; GFX9-SDAG-NEXT: s_mov_b32 s5, s1
533 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
534 ; GFX9-SDAG-NEXT: v_cvt_f32_f64_e32 v2, v[2:3]
535 ; GFX9-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
536 ; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v2
537 ; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
538 ; GFX9-SDAG-NEXT: v_lshl_or_b32 v0, v1, 16, v0
539 ; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
540 ; GFX9-SDAG-NEXT: s_endpgm
542 ; GFX9-GISEL-LABEL: fptrunc_v2f64_to_v2f16:
543 ; GFX9-GISEL: ; %bb.0: ; %entry
544 ; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
545 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
546 ; GFX9-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
547 ; GFX9-GISEL-NEXT: s_mov_b32 s2, -1
548 ; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000
549 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
550 ; GFX9-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
551 ; GFX9-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7]
552 ; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
553 ; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
554 ; GFX9-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1
555 ; GFX9-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
556 ; GFX9-GISEL-NEXT: s_endpgm
558 ; GFX11-SDAG-LABEL: fptrunc_v2f64_to_v2f16:
559 ; GFX11-SDAG: ; %bb.0: ; %entry
560 ; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
561 ; GFX11-SDAG-NEXT: s_mov_b32 s6, -1
562 ; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000
563 ; GFX11-SDAG-NEXT: s_mov_b32 s10, s6
564 ; GFX11-SDAG-NEXT: s_mov_b32 s11, s7
565 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
566 ; GFX11-SDAG-NEXT: s_mov_b32 s8, s2
567 ; GFX11-SDAG-NEXT: s_mov_b32 s9, s3
568 ; GFX11-SDAG-NEXT: s_mov_b32 s4, s0
569 ; GFX11-SDAG-NEXT: buffer_load_b128 v[0:3], off, s[8:11], 0
570 ; GFX11-SDAG-NEXT: s_mov_b32 s5, s1
571 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
572 ; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
573 ; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v1, v[2:3]
574 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
575 ; GFX11-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
576 ; GFX11-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
577 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
578 ; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
579 ; GFX11-SDAG-NEXT: v_lshl_or_b32 v0, v1, 16, v0
580 ; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0
581 ; GFX11-SDAG-NEXT: s_nop 0
582 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
583 ; GFX11-SDAG-NEXT: s_endpgm
585 ; GFX11-GISEL-LABEL: fptrunc_v2f64_to_v2f16:
586 ; GFX11-GISEL: ; %bb.0: ; %entry
587 ; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
588 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
589 ; GFX11-GISEL-NEXT: s_load_b128 s[4:7], s[2:3], 0x0
590 ; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
591 ; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
592 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
593 ; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
594 ; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7]
595 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
596 ; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
597 ; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
598 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
599 ; GFX11-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1
600 ; GFX11-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0
601 ; GFX11-GISEL-NEXT: s_nop 0
602 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
603 ; GFX11-GISEL-NEXT: s_endpgm
605 ptr addrspace(1) %a) {
607 %a.val = load <2 x double>, ptr addrspace(1) %a
608 %r.val = fptrunc <2 x double> %a.val to <2 x half>
609 store <2 x half> %r.val, ptr addrspace(1) %r
613 define amdgpu_kernel void @fneg_fptrunc_f32_to_f16(
614 ; SI-SDAG-LABEL: fneg_fptrunc_f32_to_f16:
615 ; SI-SDAG: ; %bb.0: ; %entry
616 ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
617 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
618 ; SI-SDAG-NEXT: s_mov_b32 s6, -1
619 ; SI-SDAG-NEXT: s_mov_b32 s10, s6
620 ; SI-SDAG-NEXT: s_mov_b32 s11, s7
621 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
622 ; SI-SDAG-NEXT: s_mov_b32 s8, s2
623 ; SI-SDAG-NEXT: s_mov_b32 s9, s3
624 ; SI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0
625 ; SI-SDAG-NEXT: s_mov_b32 s4, s0
626 ; SI-SDAG-NEXT: s_mov_b32 s5, s1
627 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
628 ; SI-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0
629 ; SI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0
630 ; SI-SDAG-NEXT: s_endpgm
632 ; SI-GISEL-LABEL: fneg_fptrunc_f32_to_f16:
633 ; SI-GISEL: ; %bb.0: ; %entry
634 ; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
635 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
636 ; SI-GISEL-NEXT: s_load_dword s3, s[2:3], 0x0
637 ; SI-GISEL-NEXT: s_mov_b32 s2, -1
638 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
639 ; SI-GISEL-NEXT: v_cvt_f16_f32_e64 v0, -s3
640 ; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
641 ; SI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
642 ; SI-GISEL-NEXT: s_endpgm
644 ; VI-SDAG-LABEL: fneg_fptrunc_f32_to_f16:
645 ; VI-SDAG: ; %bb.0: ; %entry
646 ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
647 ; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000
648 ; VI-SDAG-NEXT: s_mov_b32 s6, -1
649 ; VI-SDAG-NEXT: s_mov_b32 s10, s6
650 ; VI-SDAG-NEXT: s_mov_b32 s11, s7
651 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
652 ; VI-SDAG-NEXT: s_mov_b32 s8, s2
653 ; VI-SDAG-NEXT: s_mov_b32 s9, s3
654 ; VI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0
655 ; VI-SDAG-NEXT: s_mov_b32 s4, s0
656 ; VI-SDAG-NEXT: s_mov_b32 s5, s1
657 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
658 ; VI-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0
659 ; VI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0
660 ; VI-SDAG-NEXT: s_endpgm
662 ; VI-GISEL-LABEL: fneg_fptrunc_f32_to_f16:
663 ; VI-GISEL: ; %bb.0: ; %entry
664 ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
665 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
666 ; VI-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0
667 ; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000
668 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
669 ; VI-GISEL-NEXT: v_cvt_f16_f32_e64 v0, -s2
670 ; VI-GISEL-NEXT: s_mov_b32 s2, -1
671 ; VI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
672 ; VI-GISEL-NEXT: s_endpgm
674 ; GFX9-SDAG-LABEL: fneg_fptrunc_f32_to_f16:
675 ; GFX9-SDAG: ; %bb.0: ; %entry
676 ; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
677 ; GFX9-SDAG-NEXT: s_mov_b32 s7, 0xf000
678 ; GFX9-SDAG-NEXT: s_mov_b32 s6, -1
679 ; GFX9-SDAG-NEXT: s_mov_b32 s10, s6
680 ; GFX9-SDAG-NEXT: s_mov_b32 s11, s7
681 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
682 ; GFX9-SDAG-NEXT: s_mov_b32 s8, s2
683 ; GFX9-SDAG-NEXT: s_mov_b32 s9, s3
684 ; GFX9-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0
685 ; GFX9-SDAG-NEXT: s_mov_b32 s4, s0
686 ; GFX9-SDAG-NEXT: s_mov_b32 s5, s1
687 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
688 ; GFX9-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0
689 ; GFX9-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0
690 ; GFX9-SDAG-NEXT: s_endpgm
692 ; GFX9-GISEL-LABEL: fneg_fptrunc_f32_to_f16:
693 ; GFX9-GISEL: ; %bb.0: ; %entry
694 ; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
695 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
696 ; GFX9-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0
697 ; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000
698 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
699 ; GFX9-GISEL-NEXT: v_cvt_f16_f32_e64 v0, -s2
700 ; GFX9-GISEL-NEXT: s_mov_b32 s2, -1
701 ; GFX9-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
702 ; GFX9-GISEL-NEXT: s_endpgm
704 ; GFX11-SDAG-LABEL: fneg_fptrunc_f32_to_f16:
705 ; GFX11-SDAG: ; %bb.0: ; %entry
706 ; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
707 ; GFX11-SDAG-NEXT: s_mov_b32 s6, -1
708 ; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000
709 ; GFX11-SDAG-NEXT: s_mov_b32 s10, s6
710 ; GFX11-SDAG-NEXT: s_mov_b32 s11, s7
711 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
712 ; GFX11-SDAG-NEXT: s_mov_b32 s8, s2
713 ; GFX11-SDAG-NEXT: s_mov_b32 s9, s3
714 ; GFX11-SDAG-NEXT: s_mov_b32 s4, s0
715 ; GFX11-SDAG-NEXT: buffer_load_b32 v0, off, s[8:11], 0
716 ; GFX11-SDAG-NEXT: s_mov_b32 s5, s1
717 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
718 ; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0
719 ; GFX11-SDAG-NEXT: buffer_store_b16 v0, off, s[4:7], 0
720 ; GFX11-SDAG-NEXT: s_nop 0
721 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
722 ; GFX11-SDAG-NEXT: s_endpgm
724 ; GFX11-GISEL-LABEL: fneg_fptrunc_f32_to_f16:
725 ; GFX11-GISEL: ; %bb.0: ; %entry
726 ; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
727 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
728 ; GFX11-GISEL-NEXT: s_load_b32 s2, s[2:3], 0x0
729 ; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
730 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
731 ; GFX11-GISEL-NEXT: v_cvt_f16_f32_e64 v0, -s2
732 ; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
733 ; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
734 ; GFX11-GISEL-NEXT: s_nop 0
735 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
736 ; GFX11-GISEL-NEXT: s_endpgm
738 ptr addrspace(1) %a) {
740 %a.val = load float, ptr addrspace(1) %a
741 %a.fneg = fneg float %a.val
742 %r.val = fptrunc float %a.fneg to half
743 store half %r.val, ptr addrspace(1) %r
747 define amdgpu_kernel void @fabs_fptrunc_f32_to_f16(
748 ; SI-SDAG-LABEL: fabs_fptrunc_f32_to_f16:
749 ; SI-SDAG: ; %bb.0: ; %entry
750 ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
751 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
752 ; SI-SDAG-NEXT: s_mov_b32 s6, -1
753 ; SI-SDAG-NEXT: s_mov_b32 s10, s6
754 ; SI-SDAG-NEXT: s_mov_b32 s11, s7
755 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
756 ; SI-SDAG-NEXT: s_mov_b32 s8, s2
757 ; SI-SDAG-NEXT: s_mov_b32 s9, s3
758 ; SI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0
759 ; SI-SDAG-NEXT: s_mov_b32 s4, s0
760 ; SI-SDAG-NEXT: s_mov_b32 s5, s1
761 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
762 ; SI-SDAG-NEXT: v_cvt_f16_f32_e64 v0, |v0|
763 ; SI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0
764 ; SI-SDAG-NEXT: s_endpgm
766 ; SI-GISEL-LABEL: fabs_fptrunc_f32_to_f16:
767 ; SI-GISEL: ; %bb.0: ; %entry
768 ; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
769 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
770 ; SI-GISEL-NEXT: s_load_dword s3, s[2:3], 0x0
771 ; SI-GISEL-NEXT: s_mov_b32 s2, -1
772 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
773 ; SI-GISEL-NEXT: v_cvt_f16_f32_e64 v0, |s3|
774 ; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
775 ; SI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
776 ; SI-GISEL-NEXT: s_endpgm
778 ; VI-SDAG-LABEL: fabs_fptrunc_f32_to_f16:
779 ; VI-SDAG: ; %bb.0: ; %entry
780 ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
781 ; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000
782 ; VI-SDAG-NEXT: s_mov_b32 s6, -1
783 ; VI-SDAG-NEXT: s_mov_b32 s10, s6
784 ; VI-SDAG-NEXT: s_mov_b32 s11, s7
785 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
786 ; VI-SDAG-NEXT: s_mov_b32 s8, s2
787 ; VI-SDAG-NEXT: s_mov_b32 s9, s3
788 ; VI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0
789 ; VI-SDAG-NEXT: s_mov_b32 s4, s0
790 ; VI-SDAG-NEXT: s_mov_b32 s5, s1
791 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
792 ; VI-SDAG-NEXT: v_cvt_f16_f32_e64 v0, |v0|
793 ; VI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0
794 ; VI-SDAG-NEXT: s_endpgm
796 ; VI-GISEL-LABEL: fabs_fptrunc_f32_to_f16:
797 ; VI-GISEL: ; %bb.0: ; %entry
798 ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
799 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
800 ; VI-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0
801 ; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000
802 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
803 ; VI-GISEL-NEXT: v_cvt_f16_f32_e64 v0, |s2|
804 ; VI-GISEL-NEXT: s_mov_b32 s2, -1
805 ; VI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
806 ; VI-GISEL-NEXT: s_endpgm
808 ; GFX9-SDAG-LABEL: fabs_fptrunc_f32_to_f16:
809 ; GFX9-SDAG: ; %bb.0: ; %entry
810 ; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
811 ; GFX9-SDAG-NEXT: s_mov_b32 s7, 0xf000
812 ; GFX9-SDAG-NEXT: s_mov_b32 s6, -1
813 ; GFX9-SDAG-NEXT: s_mov_b32 s10, s6
814 ; GFX9-SDAG-NEXT: s_mov_b32 s11, s7
815 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
816 ; GFX9-SDAG-NEXT: s_mov_b32 s8, s2
817 ; GFX9-SDAG-NEXT: s_mov_b32 s9, s3
818 ; GFX9-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0
819 ; GFX9-SDAG-NEXT: s_mov_b32 s4, s0
820 ; GFX9-SDAG-NEXT: s_mov_b32 s5, s1
821 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
822 ; GFX9-SDAG-NEXT: v_cvt_f16_f32_e64 v0, |v0|
823 ; GFX9-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0
824 ; GFX9-SDAG-NEXT: s_endpgm
826 ; GFX9-GISEL-LABEL: fabs_fptrunc_f32_to_f16:
827 ; GFX9-GISEL: ; %bb.0: ; %entry
828 ; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
829 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
830 ; GFX9-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0
831 ; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000
832 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
833 ; GFX9-GISEL-NEXT: v_cvt_f16_f32_e64 v0, |s2|
834 ; GFX9-GISEL-NEXT: s_mov_b32 s2, -1
835 ; GFX9-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
836 ; GFX9-GISEL-NEXT: s_endpgm
838 ; GFX11-SDAG-LABEL: fabs_fptrunc_f32_to_f16:
839 ; GFX11-SDAG: ; %bb.0: ; %entry
840 ; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
841 ; GFX11-SDAG-NEXT: s_mov_b32 s6, -1
842 ; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000
843 ; GFX11-SDAG-NEXT: s_mov_b32 s10, s6
844 ; GFX11-SDAG-NEXT: s_mov_b32 s11, s7
845 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
846 ; GFX11-SDAG-NEXT: s_mov_b32 s8, s2
847 ; GFX11-SDAG-NEXT: s_mov_b32 s9, s3
848 ; GFX11-SDAG-NEXT: s_mov_b32 s4, s0
849 ; GFX11-SDAG-NEXT: buffer_load_b32 v0, off, s[8:11], 0
850 ; GFX11-SDAG-NEXT: s_mov_b32 s5, s1
851 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
852 ; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0, |v0|
853 ; GFX11-SDAG-NEXT: buffer_store_b16 v0, off, s[4:7], 0
854 ; GFX11-SDAG-NEXT: s_nop 0
855 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
856 ; GFX11-SDAG-NEXT: s_endpgm
858 ; GFX11-GISEL-LABEL: fabs_fptrunc_f32_to_f16:
859 ; GFX11-GISEL: ; %bb.0: ; %entry
860 ; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
861 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
862 ; GFX11-GISEL-NEXT: s_load_b32 s2, s[2:3], 0x0
863 ; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
864 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
865 ; GFX11-GISEL-NEXT: v_cvt_f16_f32_e64 v0, |s2|
866 ; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
867 ; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
868 ; GFX11-GISEL-NEXT: s_nop 0
869 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
870 ; GFX11-GISEL-NEXT: s_endpgm
872 ptr addrspace(1) %a) {
874 %a.val = load float, ptr addrspace(1) %a
875 %a.fabs = call float @llvm.fabs.f32(float %a.val)
876 %r.val = fptrunc float %a.fabs to half
877 store half %r.val, ptr addrspace(1) %r
881 define amdgpu_kernel void @fneg_fabs_fptrunc_f32_to_f16(
882 ; SI-SDAG-LABEL: fneg_fabs_fptrunc_f32_to_f16:
883 ; SI-SDAG: ; %bb.0: ; %entry
884 ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
885 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
886 ; SI-SDAG-NEXT: s_mov_b32 s6, -1
887 ; SI-SDAG-NEXT: s_mov_b32 s10, s6
888 ; SI-SDAG-NEXT: s_mov_b32 s11, s7
889 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
890 ; SI-SDAG-NEXT: s_mov_b32 s8, s2
891 ; SI-SDAG-NEXT: s_mov_b32 s9, s3
892 ; SI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0
893 ; SI-SDAG-NEXT: s_mov_b32 s4, s0
894 ; SI-SDAG-NEXT: s_mov_b32 s5, s1
895 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
896 ; SI-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -|v0|
897 ; SI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0
898 ; SI-SDAG-NEXT: s_endpgm
900 ; SI-GISEL-LABEL: fneg_fabs_fptrunc_f32_to_f16:
901 ; SI-GISEL: ; %bb.0: ; %entry
902 ; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
903 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
904 ; SI-GISEL-NEXT: s_load_dword s3, s[2:3], 0x0
905 ; SI-GISEL-NEXT: s_mov_b32 s2, -1
906 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
907 ; SI-GISEL-NEXT: v_cvt_f16_f32_e64 v0, -|s3|
908 ; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
909 ; SI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
910 ; SI-GISEL-NEXT: s_endpgm
912 ; VI-SDAG-LABEL: fneg_fabs_fptrunc_f32_to_f16:
913 ; VI-SDAG: ; %bb.0: ; %entry
914 ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
915 ; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000
916 ; VI-SDAG-NEXT: s_mov_b32 s6, -1
917 ; VI-SDAG-NEXT: s_mov_b32 s10, s6
918 ; VI-SDAG-NEXT: s_mov_b32 s11, s7
919 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
920 ; VI-SDAG-NEXT: s_mov_b32 s8, s2
921 ; VI-SDAG-NEXT: s_mov_b32 s9, s3
922 ; VI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0
923 ; VI-SDAG-NEXT: s_mov_b32 s4, s0
924 ; VI-SDAG-NEXT: s_mov_b32 s5, s1
925 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
926 ; VI-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -|v0|
927 ; VI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0
928 ; VI-SDAG-NEXT: s_endpgm
930 ; VI-GISEL-LABEL: fneg_fabs_fptrunc_f32_to_f16:
931 ; VI-GISEL: ; %bb.0: ; %entry
932 ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
933 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
934 ; VI-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0
935 ; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000
936 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
937 ; VI-GISEL-NEXT: v_cvt_f16_f32_e64 v0, -|s2|
938 ; VI-GISEL-NEXT: s_mov_b32 s2, -1
939 ; VI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
940 ; VI-GISEL-NEXT: s_endpgm
942 ; GFX9-SDAG-LABEL: fneg_fabs_fptrunc_f32_to_f16:
943 ; GFX9-SDAG: ; %bb.0: ; %entry
944 ; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
945 ; GFX9-SDAG-NEXT: s_mov_b32 s7, 0xf000
946 ; GFX9-SDAG-NEXT: s_mov_b32 s6, -1
947 ; GFX9-SDAG-NEXT: s_mov_b32 s10, s6
948 ; GFX9-SDAG-NEXT: s_mov_b32 s11, s7
949 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
950 ; GFX9-SDAG-NEXT: s_mov_b32 s8, s2
951 ; GFX9-SDAG-NEXT: s_mov_b32 s9, s3
952 ; GFX9-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0
953 ; GFX9-SDAG-NEXT: s_mov_b32 s4, s0
954 ; GFX9-SDAG-NEXT: s_mov_b32 s5, s1
955 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
956 ; GFX9-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -|v0|
957 ; GFX9-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0
958 ; GFX9-SDAG-NEXT: s_endpgm
960 ; GFX9-GISEL-LABEL: fneg_fabs_fptrunc_f32_to_f16:
961 ; GFX9-GISEL: ; %bb.0: ; %entry
962 ; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
963 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
964 ; GFX9-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0
965 ; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000
966 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
967 ; GFX9-GISEL-NEXT: v_cvt_f16_f32_e64 v0, -|s2|
968 ; GFX9-GISEL-NEXT: s_mov_b32 s2, -1
969 ; GFX9-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
970 ; GFX9-GISEL-NEXT: s_endpgm
972 ; GFX11-SDAG-LABEL: fneg_fabs_fptrunc_f32_to_f16:
973 ; GFX11-SDAG: ; %bb.0: ; %entry
974 ; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
975 ; GFX11-SDAG-NEXT: s_mov_b32 s6, -1
976 ; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000
977 ; GFX11-SDAG-NEXT: s_mov_b32 s10, s6
978 ; GFX11-SDAG-NEXT: s_mov_b32 s11, s7
979 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
980 ; GFX11-SDAG-NEXT: s_mov_b32 s8, s2
981 ; GFX11-SDAG-NEXT: s_mov_b32 s9, s3
982 ; GFX11-SDAG-NEXT: s_mov_b32 s4, s0
983 ; GFX11-SDAG-NEXT: buffer_load_b32 v0, off, s[8:11], 0
984 ; GFX11-SDAG-NEXT: s_mov_b32 s5, s1
985 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
986 ; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -|v0|
987 ; GFX11-SDAG-NEXT: buffer_store_b16 v0, off, s[4:7], 0
988 ; GFX11-SDAG-NEXT: s_nop 0
989 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
990 ; GFX11-SDAG-NEXT: s_endpgm
992 ; GFX11-GISEL-LABEL: fneg_fabs_fptrunc_f32_to_f16:
993 ; GFX11-GISEL: ; %bb.0: ; %entry
994 ; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
995 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
996 ; GFX11-GISEL-NEXT: s_load_b32 s2, s[2:3], 0x0
997 ; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
998 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
999 ; GFX11-GISEL-NEXT: v_cvt_f16_f32_e64 v0, -|s2|
1000 ; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
1001 ; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
1002 ; GFX11-GISEL-NEXT: s_nop 0
1003 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1004 ; GFX11-GISEL-NEXT: s_endpgm
1005 ptr addrspace(1) %r,
1006 ptr addrspace(1) %a) #0 {
1008 %a.val = load float, ptr addrspace(1) %a
1009 %a.fabs = call float @llvm.fabs.f32(float %a.val)
1010 %a.fneg.fabs = fneg float %a.fabs
1011 %r.val = fptrunc float %a.fneg.fabs to half
1012 store half %r.val, ptr addrspace(1) %r
1016 define amdgpu_kernel void @fptrunc_f32_to_f16_zext_i32(
1017 ; SI-SDAG-LABEL: fptrunc_f32_to_f16_zext_i32:
1018 ; SI-SDAG: ; %bb.0: ; %entry
1019 ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
1020 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
1021 ; SI-SDAG-NEXT: s_mov_b32 s6, -1
1022 ; SI-SDAG-NEXT: s_mov_b32 s10, s6
1023 ; SI-SDAG-NEXT: s_mov_b32 s11, s7
1024 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1025 ; SI-SDAG-NEXT: s_mov_b32 s8, s2
1026 ; SI-SDAG-NEXT: s_mov_b32 s9, s3
1027 ; SI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0
1028 ; SI-SDAG-NEXT: s_mov_b32 s4, s0
1029 ; SI-SDAG-NEXT: s_mov_b32 s5, s1
1030 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
1031 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1032 ; SI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
1033 ; SI-SDAG-NEXT: s_endpgm
1035 ; SI-GISEL-LABEL: fptrunc_f32_to_f16_zext_i32:
1036 ; SI-GISEL: ; %bb.0: ; %entry
1037 ; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
1038 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1039 ; SI-GISEL-NEXT: s_load_dword s3, s[2:3], 0x0
1040 ; SI-GISEL-NEXT: s_mov_b32 s2, -1
1041 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1042 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s3
1043 ; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
1044 ; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
1045 ; SI-GISEL-NEXT: s_endpgm
1047 ; VI-SDAG-LABEL: fptrunc_f32_to_f16_zext_i32:
1048 ; VI-SDAG: ; %bb.0: ; %entry
1049 ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1050 ; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000
1051 ; VI-SDAG-NEXT: s_mov_b32 s6, -1
1052 ; VI-SDAG-NEXT: s_mov_b32 s10, s6
1053 ; VI-SDAG-NEXT: s_mov_b32 s11, s7
1054 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1055 ; VI-SDAG-NEXT: s_mov_b32 s8, s2
1056 ; VI-SDAG-NEXT: s_mov_b32 s9, s3
1057 ; VI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0
1058 ; VI-SDAG-NEXT: s_mov_b32 s4, s0
1059 ; VI-SDAG-NEXT: s_mov_b32 s5, s1
1060 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
1061 ; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1062 ; VI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
1063 ; VI-SDAG-NEXT: s_endpgm
1065 ; VI-GISEL-LABEL: fptrunc_f32_to_f16_zext_i32:
1066 ; VI-GISEL: ; %bb.0: ; %entry
1067 ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1068 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1069 ; VI-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0
1070 ; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000
1071 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1072 ; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2
1073 ; VI-GISEL-NEXT: s_mov_b32 s2, -1
1074 ; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
1075 ; VI-GISEL-NEXT: s_endpgm
1077 ; GFX9-SDAG-LABEL: fptrunc_f32_to_f16_zext_i32:
1078 ; GFX9-SDAG: ; %bb.0: ; %entry
1079 ; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1080 ; GFX9-SDAG-NEXT: s_mov_b32 s7, 0xf000
1081 ; GFX9-SDAG-NEXT: s_mov_b32 s6, -1
1082 ; GFX9-SDAG-NEXT: s_mov_b32 s10, s6
1083 ; GFX9-SDAG-NEXT: s_mov_b32 s11, s7
1084 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1085 ; GFX9-SDAG-NEXT: s_mov_b32 s8, s2
1086 ; GFX9-SDAG-NEXT: s_mov_b32 s9, s3
1087 ; GFX9-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0
1088 ; GFX9-SDAG-NEXT: s_mov_b32 s4, s0
1089 ; GFX9-SDAG-NEXT: s_mov_b32 s5, s1
1090 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
1091 ; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1092 ; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
1093 ; GFX9-SDAG-NEXT: s_endpgm
1095 ; GFX9-GISEL-LABEL: fptrunc_f32_to_f16_zext_i32:
1096 ; GFX9-GISEL: ; %bb.0: ; %entry
1097 ; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1098 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1099 ; GFX9-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0
1100 ; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000
1101 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1102 ; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2
1103 ; GFX9-GISEL-NEXT: s_mov_b32 s2, -1
1104 ; GFX9-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
1105 ; GFX9-GISEL-NEXT: s_endpgm
1107 ; GFX11-SDAG-LABEL: fptrunc_f32_to_f16_zext_i32:
1108 ; GFX11-SDAG: ; %bb.0: ; %entry
1109 ; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
1110 ; GFX11-SDAG-NEXT: s_mov_b32 s6, -1
1111 ; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000
1112 ; GFX11-SDAG-NEXT: s_mov_b32 s10, s6
1113 ; GFX11-SDAG-NEXT: s_mov_b32 s11, s7
1114 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1115 ; GFX11-SDAG-NEXT: s_mov_b32 s8, s2
1116 ; GFX11-SDAG-NEXT: s_mov_b32 s9, s3
1117 ; GFX11-SDAG-NEXT: s_mov_b32 s4, s0
1118 ; GFX11-SDAG-NEXT: buffer_load_b32 v0, off, s[8:11], 0
1119 ; GFX11-SDAG-NEXT: s_mov_b32 s5, s1
1120 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
1121 ; GFX11-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1122 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
1123 ; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
1124 ; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0
1125 ; GFX11-SDAG-NEXT: s_nop 0
1126 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1127 ; GFX11-SDAG-NEXT: s_endpgm
1129 ; GFX11-GISEL-LABEL: fptrunc_f32_to_f16_zext_i32:
1130 ; GFX11-GISEL: ; %bb.0: ; %entry
1131 ; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
1132 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1133 ; GFX11-GISEL-NEXT: s_load_b32 s2, s[2:3], 0x0
1134 ; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
1135 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1136 ; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2
1137 ; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
1138 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
1139 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
1140 ; GFX11-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0
1141 ; GFX11-GISEL-NEXT: s_nop 0
1142 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1143 ; GFX11-GISEL-NEXT: s_endpgm
1144 ptr addrspace(1) %r,
1145 ptr addrspace(1) %a) #0 {
1147 %a.val = load float, ptr addrspace(1) %a
1148 %r.val = fptrunc float %a.val to half
1149 %r.i16 = bitcast half %r.val to i16
1150 %zext = zext i16 %r.i16 to i32
1151 store i32 %zext, ptr addrspace(1) %r
1155 define amdgpu_kernel void @fptrunc_fabs_f32_to_f16_zext_i32(
1156 ; SI-SDAG-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
1157 ; SI-SDAG: ; %bb.0: ; %entry
1158 ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
1159 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
1160 ; SI-SDAG-NEXT: s_mov_b32 s6, -1
1161 ; SI-SDAG-NEXT: s_mov_b32 s10, s6
1162 ; SI-SDAG-NEXT: s_mov_b32 s11, s7
1163 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1164 ; SI-SDAG-NEXT: s_mov_b32 s8, s2
1165 ; SI-SDAG-NEXT: s_mov_b32 s9, s3
1166 ; SI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0
1167 ; SI-SDAG-NEXT: s_mov_b32 s4, s0
1168 ; SI-SDAG-NEXT: s_mov_b32 s5, s1
1169 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
1170 ; SI-SDAG-NEXT: v_cvt_f16_f32_e64 v0, |v0|
1171 ; SI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
1172 ; SI-SDAG-NEXT: s_endpgm
1174 ; SI-GISEL-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
1175 ; SI-GISEL: ; %bb.0: ; %entry
1176 ; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
1177 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1178 ; SI-GISEL-NEXT: s_load_dword s3, s[2:3], 0x0
1179 ; SI-GISEL-NEXT: s_mov_b32 s2, -1
1180 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1181 ; SI-GISEL-NEXT: v_cvt_f16_f32_e64 v0, |s3|
1182 ; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
1183 ; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
1184 ; SI-GISEL-NEXT: s_endpgm
1186 ; VI-SDAG-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
1187 ; VI-SDAG: ; %bb.0: ; %entry
1188 ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1189 ; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000
1190 ; VI-SDAG-NEXT: s_mov_b32 s6, -1
1191 ; VI-SDAG-NEXT: s_mov_b32 s10, s6
1192 ; VI-SDAG-NEXT: s_mov_b32 s11, s7
1193 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1194 ; VI-SDAG-NEXT: s_mov_b32 s8, s2
1195 ; VI-SDAG-NEXT: s_mov_b32 s9, s3
1196 ; VI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0
1197 ; VI-SDAG-NEXT: s_mov_b32 s4, s0
1198 ; VI-SDAG-NEXT: s_mov_b32 s5, s1
1199 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
1200 ; VI-SDAG-NEXT: v_cvt_f16_f32_e64 v0, |v0|
1201 ; VI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
1202 ; VI-SDAG-NEXT: s_endpgm
1204 ; VI-GISEL-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
1205 ; VI-GISEL: ; %bb.0: ; %entry
1206 ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1207 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1208 ; VI-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0
1209 ; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000
1210 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1211 ; VI-GISEL-NEXT: v_cvt_f16_f32_e64 v0, |s2|
1212 ; VI-GISEL-NEXT: s_mov_b32 s2, -1
1213 ; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
1214 ; VI-GISEL-NEXT: s_endpgm
1216 ; GFX9-SDAG-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
1217 ; GFX9-SDAG: ; %bb.0: ; %entry
1218 ; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1219 ; GFX9-SDAG-NEXT: s_mov_b32 s7, 0xf000
1220 ; GFX9-SDAG-NEXT: s_mov_b32 s6, -1
1221 ; GFX9-SDAG-NEXT: s_mov_b32 s10, s6
1222 ; GFX9-SDAG-NEXT: s_mov_b32 s11, s7
1223 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1224 ; GFX9-SDAG-NEXT: s_mov_b32 s8, s2
1225 ; GFX9-SDAG-NEXT: s_mov_b32 s9, s3
1226 ; GFX9-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0
1227 ; GFX9-SDAG-NEXT: s_mov_b32 s4, s0
1228 ; GFX9-SDAG-NEXT: s_mov_b32 s5, s1
1229 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
1230 ; GFX9-SDAG-NEXT: v_cvt_f16_f32_e64 v0, |v0|
1231 ; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
1232 ; GFX9-SDAG-NEXT: s_endpgm
1234 ; GFX9-GISEL-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
1235 ; GFX9-GISEL: ; %bb.0: ; %entry
1236 ; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1237 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1238 ; GFX9-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0
1239 ; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000
1240 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1241 ; GFX9-GISEL-NEXT: v_cvt_f16_f32_e64 v0, |s2|
1242 ; GFX9-GISEL-NEXT: s_mov_b32 s2, -1
1243 ; GFX9-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
1244 ; GFX9-GISEL-NEXT: s_endpgm
1246 ; GFX11-SDAG-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
1247 ; GFX11-SDAG: ; %bb.0: ; %entry
1248 ; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
1249 ; GFX11-SDAG-NEXT: s_mov_b32 s6, -1
1250 ; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000
1251 ; GFX11-SDAG-NEXT: s_mov_b32 s10, s6
1252 ; GFX11-SDAG-NEXT: s_mov_b32 s11, s7
1253 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1254 ; GFX11-SDAG-NEXT: s_mov_b32 s8, s2
1255 ; GFX11-SDAG-NEXT: s_mov_b32 s9, s3
1256 ; GFX11-SDAG-NEXT: s_mov_b32 s4, s0
1257 ; GFX11-SDAG-NEXT: buffer_load_b32 v0, off, s[8:11], 0
1258 ; GFX11-SDAG-NEXT: s_mov_b32 s5, s1
1259 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
1260 ; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0, |v0|
1261 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
1262 ; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
1263 ; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0
1264 ; GFX11-SDAG-NEXT: s_nop 0
1265 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1266 ; GFX11-SDAG-NEXT: s_endpgm
1268 ; GFX11-GISEL-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
1269 ; GFX11-GISEL: ; %bb.0: ; %entry
1270 ; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
1271 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1272 ; GFX11-GISEL-NEXT: s_load_b32 s2, s[2:3], 0x0
1273 ; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
1274 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1275 ; GFX11-GISEL-NEXT: v_cvt_f16_f32_e64 v0, |s2|
1276 ; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
1277 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
1278 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
1279 ; GFX11-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0
1280 ; GFX11-GISEL-NEXT: s_nop 0
1281 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1282 ; GFX11-GISEL-NEXT: s_endpgm
1283 ptr addrspace(1) %r,
1284 ptr addrspace(1) %a) #0 {
1286 %a.val = load float, ptr addrspace(1) %a
1287 %a.fabs = call float @llvm.fabs.f32(float %a.val)
1288 %r.val = fptrunc float %a.fabs to half
1289 %r.i16 = bitcast half %r.val to i16
1290 %zext = zext i16 %r.i16 to i32
1291 store i32 %zext, ptr addrspace(1) %r
1295 define amdgpu_kernel void @fptrunc_f32_to_f16_sext_i32(
1296 ; SI-SDAG-LABEL: fptrunc_f32_to_f16_sext_i32:
1297 ; SI-SDAG: ; %bb.0: ; %entry
1298 ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
1299 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
1300 ; SI-SDAG-NEXT: s_mov_b32 s6, -1
1301 ; SI-SDAG-NEXT: s_mov_b32 s10, s6
1302 ; SI-SDAG-NEXT: s_mov_b32 s11, s7
1303 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1304 ; SI-SDAG-NEXT: s_mov_b32 s8, s2
1305 ; SI-SDAG-NEXT: s_mov_b32 s9, s3
1306 ; SI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0
1307 ; SI-SDAG-NEXT: s_mov_b32 s4, s0
1308 ; SI-SDAG-NEXT: s_mov_b32 s5, s1
1309 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
1310 ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1311 ; SI-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
1312 ; SI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
1313 ; SI-SDAG-NEXT: s_endpgm
1315 ; SI-GISEL-LABEL: fptrunc_f32_to_f16_sext_i32:
1316 ; SI-GISEL: ; %bb.0: ; %entry
1317 ; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
1318 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1319 ; SI-GISEL-NEXT: s_load_dword s3, s[2:3], 0x0
1320 ; SI-GISEL-NEXT: s_mov_b32 s2, -1
1321 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1322 ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s3
1323 ; SI-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
1324 ; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
1325 ; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
1326 ; SI-GISEL-NEXT: s_endpgm
1328 ; VI-SDAG-LABEL: fptrunc_f32_to_f16_sext_i32:
1329 ; VI-SDAG: ; %bb.0: ; %entry
1330 ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1331 ; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000
1332 ; VI-SDAG-NEXT: s_mov_b32 s6, -1
1333 ; VI-SDAG-NEXT: s_mov_b32 s10, s6
1334 ; VI-SDAG-NEXT: s_mov_b32 s11, s7
1335 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1336 ; VI-SDAG-NEXT: s_mov_b32 s8, s2
1337 ; VI-SDAG-NEXT: s_mov_b32 s9, s3
1338 ; VI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0
1339 ; VI-SDAG-NEXT: s_mov_b32 s4, s0
1340 ; VI-SDAG-NEXT: s_mov_b32 s5, s1
1341 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
1342 ; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1343 ; VI-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
1344 ; VI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
1345 ; VI-SDAG-NEXT: s_endpgm
1347 ; VI-GISEL-LABEL: fptrunc_f32_to_f16_sext_i32:
1348 ; VI-GISEL: ; %bb.0: ; %entry
1349 ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1350 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1351 ; VI-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0
1352 ; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000
1353 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1354 ; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2
1355 ; VI-GISEL-NEXT: s_mov_b32 s2, -1
1356 ; VI-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
1357 ; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
1358 ; VI-GISEL-NEXT: s_endpgm
1360 ; GFX9-SDAG-LABEL: fptrunc_f32_to_f16_sext_i32:
1361 ; GFX9-SDAG: ; %bb.0: ; %entry
1362 ; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1363 ; GFX9-SDAG-NEXT: s_mov_b32 s7, 0xf000
1364 ; GFX9-SDAG-NEXT: s_mov_b32 s6, -1
1365 ; GFX9-SDAG-NEXT: s_mov_b32 s10, s6
1366 ; GFX9-SDAG-NEXT: s_mov_b32 s11, s7
1367 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1368 ; GFX9-SDAG-NEXT: s_mov_b32 s8, s2
1369 ; GFX9-SDAG-NEXT: s_mov_b32 s9, s3
1370 ; GFX9-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0
1371 ; GFX9-SDAG-NEXT: s_mov_b32 s4, s0
1372 ; GFX9-SDAG-NEXT: s_mov_b32 s5, s1
1373 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
1374 ; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1375 ; GFX9-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
1376 ; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
1377 ; GFX9-SDAG-NEXT: s_endpgm
1379 ; GFX9-GISEL-LABEL: fptrunc_f32_to_f16_sext_i32:
1380 ; GFX9-GISEL: ; %bb.0: ; %entry
1381 ; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1382 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1383 ; GFX9-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0
1384 ; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000
1385 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1386 ; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2
1387 ; GFX9-GISEL-NEXT: s_mov_b32 s2, -1
1388 ; GFX9-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
1389 ; GFX9-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
1390 ; GFX9-GISEL-NEXT: s_endpgm
1392 ; GFX11-SDAG-LABEL: fptrunc_f32_to_f16_sext_i32:
1393 ; GFX11-SDAG: ; %bb.0: ; %entry
1394 ; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
1395 ; GFX11-SDAG-NEXT: s_mov_b32 s6, -1
1396 ; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000
1397 ; GFX11-SDAG-NEXT: s_mov_b32 s10, s6
1398 ; GFX11-SDAG-NEXT: s_mov_b32 s11, s7
1399 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1400 ; GFX11-SDAG-NEXT: s_mov_b32 s8, s2
1401 ; GFX11-SDAG-NEXT: s_mov_b32 s9, s3
1402 ; GFX11-SDAG-NEXT: s_mov_b32 s4, s0
1403 ; GFX11-SDAG-NEXT: buffer_load_b32 v0, off, s[8:11], 0
1404 ; GFX11-SDAG-NEXT: s_mov_b32 s5, s1
1405 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
1406 ; GFX11-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1407 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
1408 ; GFX11-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
1409 ; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0
1410 ; GFX11-SDAG-NEXT: s_nop 0
1411 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1412 ; GFX11-SDAG-NEXT: s_endpgm
1414 ; GFX11-GISEL-LABEL: fptrunc_f32_to_f16_sext_i32:
1415 ; GFX11-GISEL: ; %bb.0: ; %entry
1416 ; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
1417 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1418 ; GFX11-GISEL-NEXT: s_load_b32 s2, s[2:3], 0x0
1419 ; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
1420 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1421 ; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2
1422 ; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
1423 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
1424 ; GFX11-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
1425 ; GFX11-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0
1426 ; GFX11-GISEL-NEXT: s_nop 0
1427 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1428 ; GFX11-GISEL-NEXT: s_endpgm
1429 ptr addrspace(1) %r,
1430 ptr addrspace(1) %a) #0 {
1432 %a.val = load float, ptr addrspace(1) %a
1433 %r.val = fptrunc float %a.val to half
1434 %r.i16 = bitcast half %r.val to i16
1435 %zext = sext i16 %r.i16 to i32
1436 store i32 %zext, ptr addrspace(1) %r
1440 declare float @llvm.fabs.f32(float) #1
1442 attributes #0 = { nounwind }
1443 attributes #1 = { nounwind readnone }