1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI %s
3 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX89,GFX8 %s
4 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX89,GFX9 %s
5 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s
6 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s
8 define half @v_constrained_fptrunc_f32_to_f16_fpexcept_strict(float %arg) #0 {
9 ; SI-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict:
11 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
13 ; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
14 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
15 ; SI-NEXT: s_setpc_b64 s[30:31]
17 ; GFX89-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict:
19 ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20 ; GFX89-NEXT: v_cvt_f16_f32_e32 v0, v0
21 ; GFX89-NEXT: s_setpc_b64 s[30:31]
23 ; GFX1011-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict:
25 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
26 ; GFX1011-NEXT: v_cvt_f16_f32_e32 v0, v0
27 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
28 %val = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
32 define <2 x half> @v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict(<2 x float> %arg) #0 {
33 ; SI-LABEL: v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict:
35 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
36 ; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
37 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
38 ; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1
39 ; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
40 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
41 ; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
42 ; SI-NEXT: s_setpc_b64 s[30:31]
44 ; GFX8-LABEL: v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict:
46 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
47 ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
48 ; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
49 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
50 ; GFX8-NEXT: s_setpc_b64 s[30:31]
52 ; GFX9-LABEL: v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict:
54 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
55 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
56 ; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1
57 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
58 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
59 ; GFX9-NEXT: s_setpc_b64 s[30:31]
61 ; GFX1011-LABEL: v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict:
63 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64 ; GFX1011-NEXT: v_cvt_f16_f32_e32 v0, v0
65 ; GFX1011-NEXT: v_cvt_f16_f32_e32 v1, v1
66 ; GFX1011-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
67 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
68 %val = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
72 define <3 x half> @v_constrained_fptrunc_v3f32_to_v3f16_fpexcept_strict(<3 x float> %arg) #0 {
73 ; SI-LABEL: v_constrained_fptrunc_v3f32_to_v3f16_fpexcept_strict:
75 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
76 ; SI-NEXT: v_cvt_f16_f32_e32 v2, v2
77 ; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
78 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
79 ; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2
80 ; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1
81 ; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
82 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
83 ; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
84 ; SI-NEXT: v_cvt_f32_f16_e32 v2, v2
85 ; SI-NEXT: s_setpc_b64 s[30:31]
87 ; GFX8-LABEL: v_constrained_fptrunc_v3f32_to_v3f16_fpexcept_strict:
89 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
90 ; GFX8-NEXT: v_cvt_f16_f32_sdwa v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
91 ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
92 ; GFX8-NEXT: v_cvt_f16_f32_e32 v1, v2
93 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v3
94 ; GFX8-NEXT: s_setpc_b64 s[30:31]
96 ; GFX9-LABEL: v_constrained_fptrunc_v3f32_to_v3f16_fpexcept_strict:
98 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
99 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
100 ; GFX9-NEXT: v_cvt_f16_f32_e32 v3, v1
101 ; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2
102 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
103 ; GFX9-NEXT: v_perm_b32 v0, v3, v0, s4
104 ; GFX9-NEXT: s_setpc_b64 s[30:31]
106 ; GFX1011-LABEL: v_constrained_fptrunc_v3f32_to_v3f16_fpexcept_strict:
108 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
109 ; GFX1011-NEXT: v_cvt_f16_f32_e32 v0, v0
110 ; GFX1011-NEXT: v_cvt_f16_f32_e32 v1, v1
111 ; GFX1011-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
112 ; GFX1011-NEXT: v_cvt_f16_f32_e32 v1, v2
113 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
114 %val = call <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
118 define float @v_constrained_fptrunc_f64_to_f32_fpexcept_strict(double %arg) #0 {
119 ; GCN-LABEL: v_constrained_fptrunc_f64_to_f32_fpexcept_strict:
121 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
122 ; GCN-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
123 ; GCN-NEXT: s_setpc_b64 s[30:31]
124 %val = call float @llvm.experimental.constrained.fptrunc.f32.f64(double %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
128 define <2 x float> @v_constrained_fptrunc_v2f64_to_v2f32_fpexcept_strict(<2 x double> %arg) #0 {
129 ; GCN-LABEL: v_constrained_fptrunc_v2f64_to_v2f32_fpexcept_strict:
131 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
132 ; GCN-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
133 ; GCN-NEXT: v_cvt_f32_f64_e32 v1, v[2:3]
134 ; GCN-NEXT: s_setpc_b64 s[30:31]
135 %val = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
139 define <3 x float> @v_constrained_fptrunc_v3f64_to_v3f32_fpexcept_strict(<3 x double> %arg) #0 {
140 ; GCN-LABEL: v_constrained_fptrunc_v3f64_to_v3f32_fpexcept_strict:
142 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
143 ; GCN-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
144 ; GCN-NEXT: v_cvt_f32_f64_e32 v1, v[2:3]
145 ; GCN-NEXT: v_cvt_f32_f64_e32 v2, v[4:5]
146 ; GCN-NEXT: s_setpc_b64 s[30:31]
147 %val = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
152 ; define half @v_constrained_fptrunc_f64_to_f16_fpexcept_strict(double %arg) #0 {
153 ; %val = call half @llvm.experimental.constrained.fptrunc.f16.f64(double %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
157 ; define <2 x half> @v_constrained_fptrunc_v2f64_to_v2f16_fpexcept_strict(<2 x double> %arg) #0 {
158 ; %val = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f64(<2 x double> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
159 ; ret <2 x half> %val
162 ; define <3 x half> @v_constrained_fptrunc_v3f64_to_v3f16_fpexcept_strict(<3 x double> %arg) #0 {
163 ; %val = call <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f64(<3 x double> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
164 ; ret <3 x half> %val
167 define half @v_constrained_fneg_fptrunc_f32_to_f16_fpexcept_strict(float %arg) #0 {
168 ; SI-LABEL: v_constrained_fneg_fptrunc_f32_to_f16_fpexcept_strict:
170 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
171 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
172 ; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
173 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
174 ; SI-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
175 ; SI-NEXT: s_setpc_b64 s[30:31]
177 ; GFX89-LABEL: v_constrained_fneg_fptrunc_f32_to_f16_fpexcept_strict:
179 ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
180 ; GFX89-NEXT: v_cvt_f16_f32_e32 v0, v0
181 ; GFX89-NEXT: v_xor_b32_e32 v0, 0x8000, v0
182 ; GFX89-NEXT: s_setpc_b64 s[30:31]
184 ; GFX1011-LABEL: v_constrained_fneg_fptrunc_f32_to_f16_fpexcept_strict:
186 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
187 ; GFX1011-NEXT: v_cvt_f16_f32_e32 v0, v0
188 ; GFX1011-NEXT: v_xor_b32_e32 v0, 0x8000, v0
189 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
190 %val = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
191 %neg.val = fneg half %val
195 define half @v_constrained_fptrunc_fneg_f32_to_f16_fpexcept_strict(float %arg) #0 {
196 ; SI-LABEL: v_constrained_fptrunc_fneg_f32_to_f16_fpexcept_strict:
198 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
199 ; SI-NEXT: v_cvt_f16_f32_e64 v0, -v0
200 ; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
201 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
202 ; SI-NEXT: s_setpc_b64 s[30:31]
204 ; GFX89-LABEL: v_constrained_fptrunc_fneg_f32_to_f16_fpexcept_strict:
206 ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
207 ; GFX89-NEXT: v_cvt_f16_f32_e64 v0, -v0
208 ; GFX89-NEXT: s_setpc_b64 s[30:31]
210 ; GFX1011-LABEL: v_constrained_fptrunc_fneg_f32_to_f16_fpexcept_strict:
212 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
213 ; GFX1011-NEXT: v_cvt_f16_f32_e64 v0, -v0
214 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
215 %neg.arg = fneg float %arg
216 %val = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %neg.arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
220 define float @v_constrained_fneg_fptrunc_f64_to_f32_fpexcept_strict(double %arg) #0 {
221 ; GCN-LABEL: v_constrained_fneg_fptrunc_f64_to_f32_fpexcept_strict:
223 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
224 ; GCN-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
225 ; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
226 ; GCN-NEXT: s_setpc_b64 s[30:31]
227 %val = call float @llvm.experimental.constrained.fptrunc.f32.f64(double %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
228 %neg.val = fneg float %val
232 define float @v_constrained_fptrunc_fneg_f64_to_f32_fpexcept_strict(double %arg) #0 {
233 ; GCN-LABEL: v_constrained_fptrunc_fneg_f64_to_f32_fpexcept_strict:
235 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
236 ; GCN-NEXT: v_cvt_f32_f64_e64 v0, -v[0:1]
237 ; GCN-NEXT: s_setpc_b64 s[30:31]
238 %neg.arg = fneg double %arg
239 %val = call float @llvm.experimental.constrained.fptrunc.f32.f64(double %neg.arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
243 define void @v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi(float %arg, ptr addrspace(1) %ptr) #0 {
244 ; SI-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi:
246 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
247 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
248 ; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
249 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
250 ; SI-NEXT: s_setpc_b64 s[30:31]
252 ; GFX89-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi:
254 ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
255 ; GFX89-NEXT: v_cvt_f16_f32_e32 v0, v0
256 ; GFX89-NEXT: s_setpc_b64 s[30:31]
258 ; GFX1011-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi:
260 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
261 ; GFX1011-NEXT: v_cvt_f16_f32_e32 v0, v0
262 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
263 %result = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
267 define void @v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict_noabi(<2 x float> %arg, ptr addrspace(1) %ptr) #0 {
268 ; SI-LABEL: v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict_noabi:
270 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
271 ; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
272 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
273 ; SI-NEXT: s_mov_b32 s6, 0
274 ; SI-NEXT: s_mov_b32 s7, 0xf000
275 ; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1
276 ; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
277 ; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
278 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
279 ; SI-NEXT: s_mov_b32 s4, s6
280 ; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
281 ; SI-NEXT: s_mov_b32 s5, s6
282 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
283 ; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
284 ; SI-NEXT: v_or_b32_e32 v0, v0, v1
285 ; SI-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
286 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0)
287 ; SI-NEXT: s_setpc_b64 s[30:31]
289 ; GFX8-LABEL: v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict_noabi:
291 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
292 ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
293 ; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
294 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
295 ; GFX8-NEXT: flat_store_dword v[2:3], v0
296 ; GFX8-NEXT: s_waitcnt vmcnt(0)
297 ; GFX8-NEXT: s_setpc_b64 s[30:31]
299 ; GFX9-LABEL: v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict_noabi:
301 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
302 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
303 ; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1
304 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
305 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
306 ; GFX9-NEXT: global_store_dword v[2:3], v0, off
307 ; GFX9-NEXT: s_waitcnt vmcnt(0)
308 ; GFX9-NEXT: s_setpc_b64 s[30:31]
310 ; GFX10-LABEL: v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict_noabi:
312 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
313 ; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0
314 ; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v1
315 ; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
316 ; GFX10-NEXT: global_store_dword v[2:3], v0, off
317 ; GFX10-NEXT: s_setpc_b64 s[30:31]
319 ; GFX11-LABEL: v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict_noabi:
321 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
322 ; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
323 ; GFX11-NEXT: v_cvt_f16_f32_e32 v1, v1
324 ; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
325 ; GFX11-NEXT: global_store_b32 v[2:3], v0, off
326 ; GFX11-NEXT: s_setpc_b64 s[30:31]
327 %result = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
328 store <2 x half> %result, ptr addrspace(1) %ptr
332 define void @v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fneg(float %arg, ptr addrspace(1) %ptr) #0 {
333 ; SI-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fneg:
335 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
336 ; SI-NEXT: v_cvt_f16_f32_e64 v0, -v0
337 ; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
338 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
339 ; SI-NEXT: s_setpc_b64 s[30:31]
341 ; GFX89-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fneg:
343 ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
344 ; GFX89-NEXT: v_cvt_f16_f32_e64 v0, -v0
345 ; GFX89-NEXT: s_setpc_b64 s[30:31]
347 ; GFX1011-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fneg:
349 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
350 ; GFX1011-NEXT: v_cvt_f16_f32_e64 v0, -v0
351 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
352 %neg.arg = fneg float %arg
353 %result = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %neg.arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
357 define void @v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fabs(float %arg, ptr addrspace(1) %ptr) #0 {
358 ; SI-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fabs:
360 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
361 ; SI-NEXT: v_cvt_f16_f32_e64 v0, |v0|
362 ; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
363 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
364 ; SI-NEXT: s_setpc_b64 s[30:31]
366 ; GFX89-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fabs:
368 ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
369 ; GFX89-NEXT: v_cvt_f16_f32_e64 v0, |v0|
370 ; GFX89-NEXT: s_setpc_b64 s[30:31]
372 ; GFX1011-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fabs:
374 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
375 ; GFX1011-NEXT: v_cvt_f16_f32_e64 v0, |v0|
376 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
377 %abs.arg = call float @llvm.fabs.f32(float %arg) #0
378 %result = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %abs.arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
382 declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata)
383 declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float>, metadata, metadata)
384 declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float>, metadata, metadata)
386 declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata)
387 declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata)
388 declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata)
390 declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata)
391 declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f64(<2 x double>, metadata, metadata)
392 declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f64(<3 x double>, metadata, metadata)
394 declare float @llvm.fabs.f32(float)
396 attributes #0 = { strictfp }