1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; XUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI %s
3 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX89,GFX8 %s
4 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX89,GFX9 %s
5 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s
6 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s
8 define half @v_constrained_fptrunc_f32_to_f16_fpexcept_strict(float %arg) #0 {
9 ; GCN-LABEL: v_constrained_fptrunc_f32_to_f16_fpexcept_strict:
11 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12 ; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0
13 ; GCN-NEXT: s_setpc_b64 s[30:31]
14 %val = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
18 define <2 x half> @v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict(<2 x float> %arg) #0 {
19 ; GFX8-LABEL: v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict:
21 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22 ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
23 ; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
24 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
25 ; GFX8-NEXT: s_setpc_b64 s[30:31]
27 ; GFX9-LABEL: v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict:
29 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
31 ; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1
32 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
33 ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
34 ; GFX9-NEXT: s_setpc_b64 s[30:31]
36 ; GFX1011-LABEL: v_constrained_fptrunc_v2f32_to_v2f16_fpexcept_strict:
38 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39 ; GFX1011-NEXT: v_cvt_f16_f32_e32 v0, v0
40 ; GFX1011-NEXT: v_cvt_f16_f32_e32 v1, v1
41 ; GFX1011-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
42 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
43 %val = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
47 define <3 x half> @v_constrained_fptrunc_v3f32_to_v3f16_fpexcept_strict(<3 x float> %arg) #0 {
48 ; GFX8-LABEL: v_constrained_fptrunc_v3f32_to_v3f16_fpexcept_strict:
50 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51 ; GFX8-NEXT: v_cvt_f16_f32_sdwa v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
52 ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
53 ; GFX8-NEXT: v_cvt_f16_f32_e32 v1, v2
54 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v3
55 ; GFX8-NEXT: s_setpc_b64 s[30:31]
57 ; GFX9-LABEL: v_constrained_fptrunc_v3f32_to_v3f16_fpexcept_strict:
59 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
60 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
61 ; GFX9-NEXT: v_cvt_f16_f32_e32 v3, v1
62 ; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2
63 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100
64 ; GFX9-NEXT: v_perm_b32 v0, v3, v0, s4
65 ; GFX9-NEXT: s_setpc_b64 s[30:31]
67 ; GFX1011-LABEL: v_constrained_fptrunc_v3f32_to_v3f16_fpexcept_strict:
69 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
70 ; GFX1011-NEXT: v_cvt_f16_f32_e32 v0, v0
71 ; GFX1011-NEXT: v_cvt_f16_f32_e32 v1, v1
72 ; GFX1011-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
73 ; GFX1011-NEXT: v_cvt_f16_f32_e32 v1, v2
74 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
75 %val = call <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
79 define float @v_constrained_fptrunc_f64_to_f32_fpexcept_strict(double %arg) #0 {
80 ; GCN-LABEL: v_constrained_fptrunc_f64_to_f32_fpexcept_strict:
82 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
83 ; GCN-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
84 ; GCN-NEXT: s_setpc_b64 s[30:31]
85 %val = call float @llvm.experimental.constrained.fptrunc.f32.f64(double %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
89 define <2 x float> @v_constrained_fptrunc_v2f64_to_v2f32_fpexcept_strict(<2 x double> %arg) #0 {
90 ; GCN-LABEL: v_constrained_fptrunc_v2f64_to_v2f32_fpexcept_strict:
92 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
93 ; GCN-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
94 ; GCN-NEXT: v_cvt_f32_f64_e32 v1, v[2:3]
95 ; GCN-NEXT: s_setpc_b64 s[30:31]
96 %val = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
100 define <3 x float> @v_constrained_fptrunc_v3f64_to_v3f32_fpexcept_strict(<3 x double> %arg) #0 {
101 ; GCN-LABEL: v_constrained_fptrunc_v3f64_to_v3f32_fpexcept_strict:
103 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104 ; GCN-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
105 ; GCN-NEXT: v_cvt_f32_f64_e32 v1, v[2:3]
106 ; GCN-NEXT: v_cvt_f32_f64_e32 v2, v[4:5]
107 ; GCN-NEXT: s_setpc_b64 s[30:31]
108 %val = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
113 ; define half @v_constrained_fptrunc_f64_to_f16_fpexcept_strict(double %arg) #0 {
114 ; %val = call half @llvm.experimental.constrained.fptrunc.f16.f64(double %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
118 ; define <2 x half> @v_constrained_fptrunc_v2f64_to_v2f16_fpexcept_strict(<2 x double> %arg) #0 {
119 ; %val = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f64(<2 x double> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
120 ; ret <2 x half> %val
123 ; define <3 x half> @v_constrained_fptrunc_v3f64_to_v3f16_fpexcept_strict(<3 x double> %arg) #0 {
124 ; %val = call <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f64(<3 x double> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
125 ; ret <3 x half> %val
128 define half @v_constrained_fneg_fptrunc_f32_to_f16_fpexcept_strict(float %arg) #0 {
129 ; GCN-LABEL: v_constrained_fneg_fptrunc_f32_to_f16_fpexcept_strict:
131 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
132 ; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0
133 ; GCN-NEXT: v_xor_b32_e32 v0, 0x8000, v0
134 ; GCN-NEXT: s_setpc_b64 s[30:31]
135 %val = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
136 %neg.val = fneg half %val
140 define half @v_constrained_fptrunc_fneg_f32_to_f16_fpexcept_strict(float %arg) #0 {
141 ; GCN-LABEL: v_constrained_fptrunc_fneg_f32_to_f16_fpexcept_strict:
143 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
144 ; GCN-NEXT: v_cvt_f16_f32_e64 v0, -v0
145 ; GCN-NEXT: s_setpc_b64 s[30:31]
146 %neg.arg = fneg float %arg
147 %val = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %neg.arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
151 define float @v_constrained_fneg_fptrunc_f64_to_f32_fpexcept_strict(double %arg) #0 {
152 ; GCN-LABEL: v_constrained_fneg_fptrunc_f64_to_f32_fpexcept_strict:
154 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
155 ; GCN-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
156 ; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
157 ; GCN-NEXT: s_setpc_b64 s[30:31]
158 %val = call float @llvm.experimental.constrained.fptrunc.f32.f64(double %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
159 %neg.val = fneg float %val
163 define float @v_constrained_fptrunc_fneg_f64_to_f32_fpexcept_strict(double %arg) #0 {
164 ; GCN-LABEL: v_constrained_fptrunc_fneg_f64_to_f32_fpexcept_strict:
166 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
167 ; GCN-NEXT: v_cvt_f32_f64_e64 v0, -v[0:1]
168 ; GCN-NEXT: s_setpc_b64 s[30:31]
169 %neg.arg = fneg double %arg
170 %val = call float @llvm.experimental.constrained.fptrunc.f32.f64(double %neg.arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
174 declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata) #1
175 declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float>, metadata, metadata) #1
176 declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float>, metadata, metadata) #1
178 declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) #1
179 declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata) #1
180 declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata) #1
182 declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata) #1
183 declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f64(<2 x double>, metadata, metadata) #1
184 declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f64(<3 x double>, metadata, metadata) #1
186 attributes #0 = { strictfp }
187 attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
188 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: