1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; FIXME: Missing operand promote for f16
3 ; XUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI %s
4 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX89,GFX8 %s
5 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX89,GFX9 %s
6 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s
7 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s
9 define float @v_constrained_fpext_f16_to_f32_fpexcept_strict(half %arg) #0 {
10 ; GCN-LABEL: v_constrained_fpext_f16_to_f32_fpexcept_strict:
12 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13 ; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0
14 ; GCN-NEXT: s_setpc_b64 s[30:31]
15 %result = call float @llvm.experimental.constrained.fpext.f32.f16(half %arg, metadata !"fpexcept.strict")
19 define <2 x float> @v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict(<2 x half> %arg) #0 {
20 ; GFX89-LABEL: v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict:
22 ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23 ; GFX89-NEXT: v_cvt_f32_f16_e32 v2, v0
24 ; GFX89-NEXT: v_cvt_f32_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
25 ; GFX89-NEXT: v_mov_b32_e32 v0, v2
26 ; GFX89-NEXT: s_setpc_b64 s[30:31]
28 ; GFX10-LABEL: v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict:
30 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
31 ; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v0
32 ; GFX10-NEXT: v_cvt_f32_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
33 ; GFX10-NEXT: v_mov_b32_e32 v0, v2
34 ; GFX10-NEXT: s_setpc_b64 s[30:31]
36 ; GFX11-LABEL: v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict:
38 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
40 ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
41 ; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v1
42 ; GFX11-NEXT: s_setpc_b64 s[30:31]
43 %result = call <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half> %arg, metadata !"fpexcept.strict")
44 ret <2 x float> %result
47 define <3 x float> @v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict(<3 x half> %arg) #0 {
48 ; GFX89-LABEL: v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict:
50 ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51 ; GFX89-NEXT: v_cvt_f32_f16_e32 v4, v0
52 ; GFX89-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
53 ; GFX89-NEXT: v_cvt_f32_f16_e32 v2, v1
54 ; GFX89-NEXT: v_mov_b32_e32 v0, v4
55 ; GFX89-NEXT: v_mov_b32_e32 v1, v3
56 ; GFX89-NEXT: s_setpc_b64 s[30:31]
58 ; GFX10-LABEL: v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict:
60 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
61 ; GFX10-NEXT: v_cvt_f32_f16_e32 v4, v0
62 ; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
63 ; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v1
64 ; GFX10-NEXT: v_mov_b32_e32 v0, v4
65 ; GFX10-NEXT: v_mov_b32_e32 v1, v3
66 ; GFX10-NEXT: s_setpc_b64 s[30:31]
68 ; GFX11-LABEL: v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict:
70 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
71 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
72 ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
73 ; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v2
74 ; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v1
75 ; GFX11-NEXT: v_mov_b32_e32 v1, v3
76 ; GFX11-NEXT: s_setpc_b64 s[30:31]
77 %result = call <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half> %arg, metadata !"fpexcept.strict")
78 ret <3 x float> %result
81 define double @v_constrained_fpext_f32_to_f64_fpexcept_strict(float %arg) #0 {
82 ; GCN-LABEL: v_constrained_fpext_f32_to_f64_fpexcept_strict:
84 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85 ; GCN-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
86 ; GCN-NEXT: s_setpc_b64 s[30:31]
87 %result = call double @llvm.experimental.constrained.fpext.f64.f32(float %arg, metadata !"fpexcept.strict")
91 define <2 x double> @v_constrained_fpext_v2f32_to_v2f64_fpexcept_strict(<2 x float> %arg) #0 {
92 ; GCN-LABEL: v_constrained_fpext_v2f32_to_v2f64_fpexcept_strict:
94 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
95 ; GCN-NEXT: v_mov_b32_e32 v2, v1
96 ; GCN-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
97 ; GCN-NEXT: v_cvt_f64_f32_e32 v[2:3], v2
98 ; GCN-NEXT: s_setpc_b64 s[30:31]
99 %result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float> %arg, metadata !"fpexcept.strict")
100 ret <2 x double> %result
103 define <3 x double> @v_constrained_fpext_v3f32_to_v3f64_fpexcept_strict(<3 x float> %arg) #0 {
104 ; GFX89-LABEL: v_constrained_fpext_v3f32_to_v3f64_fpexcept_strict:
106 ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
107 ; GFX89-NEXT: v_mov_b32_e32 v4, v2
108 ; GFX89-NEXT: v_mov_b32_e32 v2, v1
109 ; GFX89-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
110 ; GFX89-NEXT: v_cvt_f64_f32_e32 v[2:3], v2
111 ; GFX89-NEXT: v_cvt_f64_f32_e32 v[4:5], v4
112 ; GFX89-NEXT: s_setpc_b64 s[30:31]
114 ; GFX1011-LABEL: v_constrained_fpext_v3f32_to_v3f64_fpexcept_strict:
116 ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
117 ; GFX1011-NEXT: v_mov_b32_e32 v4, v2
118 ; GFX1011-NEXT: v_mov_b32_e32 v2, v1
119 ; GFX1011-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
120 ; GFX1011-NEXT: v_cvt_f64_f32_e32 v[4:5], v4
121 ; GFX1011-NEXT: v_cvt_f64_f32_e32 v[2:3], v2
122 ; GFX1011-NEXT: s_setpc_b64 s[30:31]
123 %result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float> %arg, metadata !"fpexcept.strict")
124 ret <3 x double> %result
127 define double @v_constrained_fpext_f16_to_f64_fpexcept_strict(half %arg) #0 {
128 ; GCN-LABEL: v_constrained_fpext_f16_to_f64_fpexcept_strict:
130 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
131 ; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0
132 ; GCN-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
133 ; GCN-NEXT: s_setpc_b64 s[30:31]
134 %result = call double @llvm.experimental.constrained.fpext.f64.f16(half %arg, metadata !"fpexcept.strict")
138 define <2 x double> @v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict(<2 x half> %arg) #0 {
139 ; GFX89-LABEL: v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict:
141 ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142 ; GFX89-NEXT: v_cvt_f32_f16_e32 v1, v0
143 ; GFX89-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
144 ; GFX89-NEXT: v_cvt_f64_f32_e32 v[0:1], v1
145 ; GFX89-NEXT: v_cvt_f64_f32_e32 v[2:3], v2
146 ; GFX89-NEXT: s_setpc_b64 s[30:31]
148 ; GFX10-LABEL: v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict:
150 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
151 ; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v0
152 ; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
153 ; GFX10-NEXT: v_cvt_f64_f32_e32 v[0:1], v1
154 ; GFX10-NEXT: v_cvt_f64_f32_e32 v[2:3], v2
155 ; GFX10-NEXT: s_setpc_b64 s[30:31]
157 ; GFX11-LABEL: v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict:
159 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
160 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
161 ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
162 ; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v1
163 ; GFX11-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
164 ; GFX11-NEXT: v_cvt_f64_f32_e32 v[2:3], v2
165 ; GFX11-NEXT: s_setpc_b64 s[30:31]
166 %result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f16(<2 x half> %arg, metadata !"fpexcept.strict")
167 ret <2 x double> %result
170 define <3 x double> @v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict(<3 x half> %arg) #0 {
171 ; GFX89-LABEL: v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict:
173 ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
174 ; GFX89-NEXT: v_cvt_f32_f16_e32 v2, v0
175 ; GFX89-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
176 ; GFX89-NEXT: v_cvt_f32_f16_e32 v4, v1
177 ; GFX89-NEXT: v_cvt_f64_f32_e32 v[0:1], v2
178 ; GFX89-NEXT: v_cvt_f64_f32_e32 v[2:3], v3
179 ; GFX89-NEXT: v_cvt_f64_f32_e32 v[4:5], v4
180 ; GFX89-NEXT: s_setpc_b64 s[30:31]
182 ; GFX10-LABEL: v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict:
184 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
185 ; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v0
186 ; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
187 ; GFX10-NEXT: v_cvt_f32_f16_e32 v4, v1
188 ; GFX10-NEXT: v_cvt_f64_f32_e32 v[0:1], v2
189 ; GFX10-NEXT: v_cvt_f64_f32_e32 v[2:3], v3
190 ; GFX10-NEXT: v_cvt_f64_f32_e32 v[4:5], v4
191 ; GFX10-NEXT: s_setpc_b64 s[30:31]
193 ; GFX11-LABEL: v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict:
195 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
196 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
197 ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
198 ; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v1
199 ; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v2
200 ; GFX11-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
201 ; GFX11-NEXT: v_cvt_f64_f32_e32 v[4:5], v3
202 ; GFX11-NEXT: v_cvt_f64_f32_e32 v[2:3], v2
203 ; GFX11-NEXT: s_setpc_b64 s[30:31]
204 %result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f16(<3 x half> %arg, metadata !"fpexcept.strict")
205 ret <3 x double> %result
208 define float @v_constrained_fneg_fpext_f16_to_f32_fpexcept_strict(half %arg) #0 {
209 ; GCN-LABEL: v_constrained_fneg_fpext_f16_to_f32_fpexcept_strict:
211 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
212 ; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0
213 ; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
214 ; GCN-NEXT: s_setpc_b64 s[30:31]
215 %result = call float @llvm.experimental.constrained.fpext.f32.f16(half %arg, metadata !"fpexcept.strict")
216 %neg.result = fneg float %result
217 ret float %neg.result
220 define float @v_constrained_fpext_fneg_f16_to_f32_fpexcept_strict(half %arg) #0 {
221 ; GCN-LABEL: v_constrained_fpext_fneg_f16_to_f32_fpexcept_strict:
223 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
224 ; GCN-NEXT: v_cvt_f32_f16_e64 v0, -v0
225 ; GCN-NEXT: s_setpc_b64 s[30:31]
226 %neg.arg = fneg half %arg
227 %result = call float @llvm.experimental.constrained.fpext.f32.f16(half %neg.arg, metadata !"fpexcept.strict")
231 define double @v_constrained_fpext_fneg_f32_to_f64_fpexcept_strict(float %arg) #0 {
232 ; GCN-LABEL: v_constrained_fpext_fneg_f32_to_f64_fpexcept_strict:
234 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
235 ; GCN-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
236 ; GCN-NEXT: s_setpc_b64 s[30:31]
237 %neg.arg = fneg float %arg
238 %result = call double @llvm.experimental.constrained.fpext.f64.f32(float %arg, metadata !"fpexcept.strict")
242 define double @v_constrained_fneg_fpext_f32_to_f64_fpexcept_strict(float %arg) #0 {
243 ; GCN-LABEL: v_constrained_fneg_fpext_f32_to_f64_fpexcept_strict:
245 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
246 ; GCN-NEXT: v_cvt_f64_f32_e32 v[0:1], v0
247 ; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1
248 ; GCN-NEXT: s_setpc_b64 s[30:31]
249 %result = call double @llvm.experimental.constrained.fpext.f64.f32(float %arg, metadata !"fpexcept.strict")
250 %neg.result = fneg double %result
251 ret double %neg.result
254 declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) #1
255 declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata) #1
256 declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float>, metadata) #1
258 declare double @llvm.experimental.constrained.fpext.f64.f16(half, metadata) #1
259 declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f16(<2 x half>, metadata) #1
260 declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f16(<3 x half>, metadata) #1
262 declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) #1
263 declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half>, metadata) #1
264 declare <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half>, metadata) #1
266 attributes #0 = { strictfp }
267 attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
268 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: