1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6-SDAG %s
3 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-SDAG %s
4 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s
5 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG %s
7 ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6-GISEL %s
8 ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s
9 ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
10 ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL %s
12 define float @test_ldexp_f32_i32(ptr addrspace(1) %out, float %a, i32 %b) {
13 ; GFX6-LABEL: test_ldexp_f32_i32:
15 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16 ; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v3
17 ; GFX6-NEXT: s_setpc_b64 s[30:31]
19 ; GFX8-LABEL: test_ldexp_f32_i32:
21 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22 ; GFX8-NEXT: v_ldexp_f32 v0, v2, v3
23 ; GFX8-NEXT: s_setpc_b64 s[30:31]
25 ; GFX9-LABEL: test_ldexp_f32_i32:
27 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
28 ; GFX9-NEXT: v_ldexp_f32 v0, v2, v3
29 ; GFX9-NEXT: s_setpc_b64 s[30:31]
31 ; GFX11-LABEL: test_ldexp_f32_i32:
33 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34 ; GFX11-NEXT: v_ldexp_f32 v0, v2, v3
35 ; GFX11-NEXT: s_setpc_b64 s[30:31]
36 %result = call float @llvm.ldexp.f32.i32(float %a, i32 %b)
40 define <2 x float> @test_ldexp_v2f32_v2i32(ptr addrspace(1) %out, <2 x float> %a, <2 x i32> %b) {
41 ; GFX6-LABEL: test_ldexp_v2f32_v2i32:
43 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44 ; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v4
45 ; GFX6-NEXT: v_ldexp_f32_e32 v1, v3, v5
46 ; GFX6-NEXT: s_setpc_b64 s[30:31]
48 ; GFX8-LABEL: test_ldexp_v2f32_v2i32:
50 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51 ; GFX8-NEXT: v_ldexp_f32 v0, v2, v4
52 ; GFX8-NEXT: v_ldexp_f32 v1, v3, v5
53 ; GFX8-NEXT: s_setpc_b64 s[30:31]
55 ; GFX9-LABEL: test_ldexp_v2f32_v2i32:
57 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
58 ; GFX9-NEXT: v_ldexp_f32 v0, v2, v4
59 ; GFX9-NEXT: v_ldexp_f32 v1, v3, v5
60 ; GFX9-NEXT: s_setpc_b64 s[30:31]
62 ; GFX11-LABEL: test_ldexp_v2f32_v2i32:
64 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
65 ; GFX11-NEXT: v_ldexp_f32 v0, v2, v4
66 ; GFX11-NEXT: v_ldexp_f32 v1, v3, v5
67 ; GFX11-NEXT: s_setpc_b64 s[30:31]
68 %result = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> %a, <2 x i32> %b)
69 ret <2 x float> %result
72 define <3 x float> @test_ldexp_v3f32_v3i32(ptr addrspace(1) %out, <3 x float> %a, <3 x i32> %b) {
73 ; GFX6-LABEL: test_ldexp_v3f32_v3i32:
75 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
76 ; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v5
77 ; GFX6-NEXT: v_ldexp_f32_e32 v1, v3, v6
78 ; GFX6-NEXT: v_ldexp_f32_e32 v2, v4, v7
79 ; GFX6-NEXT: s_setpc_b64 s[30:31]
81 ; GFX8-LABEL: test_ldexp_v3f32_v3i32:
83 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
84 ; GFX8-NEXT: v_ldexp_f32 v0, v2, v5
85 ; GFX8-NEXT: v_ldexp_f32 v1, v3, v6
86 ; GFX8-NEXT: v_ldexp_f32 v2, v4, v7
87 ; GFX8-NEXT: s_setpc_b64 s[30:31]
89 ; GFX9-LABEL: test_ldexp_v3f32_v3i32:
91 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
92 ; GFX9-NEXT: v_ldexp_f32 v0, v2, v5
93 ; GFX9-NEXT: v_ldexp_f32 v1, v3, v6
94 ; GFX9-NEXT: v_ldexp_f32 v2, v4, v7
95 ; GFX9-NEXT: s_setpc_b64 s[30:31]
97 ; GFX11-LABEL: test_ldexp_v3f32_v3i32:
99 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
100 ; GFX11-NEXT: v_ldexp_f32 v0, v2, v5
101 ; GFX11-NEXT: v_ldexp_f32 v1, v3, v6
102 ; GFX11-NEXT: v_ldexp_f32 v2, v4, v7
103 ; GFX11-NEXT: s_setpc_b64 s[30:31]
104 %result = call <3 x float> @llvm.ldexp.v3f32.v3i32(<3 x float> %a, <3 x i32> %b)
105 ret <3 x float> %result
108 define <4 x float> @test_ldexp_v4f32_v4i32(ptr addrspace(1) %out, <4 x float> %a, <4 x i32> %b) {
109 ; GFX6-LABEL: test_ldexp_v4f32_v4i32:
111 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
112 ; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v6
113 ; GFX6-NEXT: v_ldexp_f32_e32 v1, v3, v7
114 ; GFX6-NEXT: v_ldexp_f32_e32 v2, v4, v8
115 ; GFX6-NEXT: v_ldexp_f32_e32 v3, v5, v9
116 ; GFX6-NEXT: s_setpc_b64 s[30:31]
118 ; GFX8-LABEL: test_ldexp_v4f32_v4i32:
120 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
121 ; GFX8-NEXT: v_ldexp_f32 v0, v2, v6
122 ; GFX8-NEXT: v_ldexp_f32 v1, v3, v7
123 ; GFX8-NEXT: v_ldexp_f32 v2, v4, v8
124 ; GFX8-NEXT: v_ldexp_f32 v3, v5, v9
125 ; GFX8-NEXT: s_setpc_b64 s[30:31]
127 ; GFX9-LABEL: test_ldexp_v4f32_v4i32:
129 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130 ; GFX9-NEXT: v_ldexp_f32 v0, v2, v6
131 ; GFX9-NEXT: v_ldexp_f32 v1, v3, v7
132 ; GFX9-NEXT: v_ldexp_f32 v2, v4, v8
133 ; GFX9-NEXT: v_ldexp_f32 v3, v5, v9
134 ; GFX9-NEXT: s_setpc_b64 s[30:31]
136 ; GFX11-LABEL: test_ldexp_v4f32_v4i32:
138 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
139 ; GFX11-NEXT: v_ldexp_f32 v0, v2, v6
140 ; GFX11-NEXT: v_ldexp_f32 v1, v3, v7
141 ; GFX11-NEXT: v_ldexp_f32 v2, v4, v8
142 ; GFX11-NEXT: v_ldexp_f32 v3, v5, v9
143 ; GFX11-NEXT: s_setpc_b64 s[30:31]
144 %result = call <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float> %a, <4 x i32> %b)
145 ret <4 x float> %result
148 define double @test_ldexp_f64_i32(double %a, i32 %b) {
149 ; GCN-LABEL: test_ldexp_f64_i32:
151 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
152 ; GCN-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2
153 ; GCN-NEXT: s_setpc_b64 s[30:31]
154 %result = call double @llvm.ldexp.f64.i32(double %a, i32 %b)
158 define <2 x double> @test_ldexp_v2f64_v2i32(<2 x double> %a, <2 x i32> %b) {
159 ; GCN-LABEL: test_ldexp_v2f64_v2i32:
161 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
162 ; GCN-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4
163 ; GCN-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5
164 ; GCN-NEXT: s_setpc_b64 s[30:31]
165 %result = call <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double> %a, <2 x i32> %b)
166 ret <2 x double> %result
170 ; define float @test_ldexp_f32_i16(float %a, i16 %b) {
171 ; %result = call float @llvm.ldexp.f32.i16(float %a, i16 %b)
175 ; FIXME: Should be able to truncate to i32
176 ; define float @test_ldexp_f32_i64(float %a, i64 %b) {
177 ; %result = call float @llvm.ldexp.f32.i64(float %a, i64 %b)
181 ; define <2 x float> @test_ldexp_v2f32_v2i16(<2 x float> %a, <2 x i16> %b) {
182 ; %result = call <2 x float> @llvm.ldexp.v2f32.v2i16(<2 x float> %a, <2 x i16> %b)
183 ; ret <2 x float> %result
186 ; FIXME: Should be able to truncate to i32
187 ; define <2 x float> @test_ldexp_v2f32_v2i64(<2 x float> %a, <2 x i64> %b) {
188 ; %result = call <2 x float> @llvm.ldexp.v2f32.v2i64(<2 x float> %a, <2 x i64> %b)
189 ; ret <2 x float> %result
192 define half @test_ldexp_f16_i8(half %a, i8 %b) {
193 ; GFX6-SDAG-LABEL: test_ldexp_f16_i8:
194 ; GFX6-SDAG: ; %bb.0:
195 ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
196 ; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
197 ; GFX6-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
198 ; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
199 ; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
200 ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
202 ; GFX8-SDAG-LABEL: test_ldexp_f16_i8:
203 ; GFX8-SDAG: ; %bb.0:
204 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
205 ; GFX8-SDAG-NEXT: v_ldexp_f16_sdwa v0, v0, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
206 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
208 ; GFX9-SDAG-LABEL: test_ldexp_f16_i8:
209 ; GFX9-SDAG: ; %bb.0:
210 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
211 ; GFX9-SDAG-NEXT: v_ldexp_f16_sdwa v0, v0, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
212 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
214 ; GFX11-SDAG-LABEL: test_ldexp_f16_i8:
215 ; GFX11-SDAG: ; %bb.0:
216 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217 ; GFX11-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
218 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
219 ; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
220 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
222 ; GFX6-GISEL-LABEL: test_ldexp_f16_i8:
223 ; GFX6-GISEL: ; %bb.0:
224 ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
225 ; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
226 ; GFX6-GISEL-NEXT: v_bfe_i32 v1, v1, 0, 8
227 ; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
228 ; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
229 ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
231 ; GFX8-GISEL-LABEL: test_ldexp_f16_i8:
232 ; GFX8-GISEL: ; %bb.0:
233 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
234 ; GFX8-GISEL-NEXT: v_bfe_i32 v1, v1, 0, 8
235 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff8000
236 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
237 ; GFX8-GISEL-NEXT: v_med3_i32 v1, v1, v2, v3
238 ; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
239 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
241 ; GFX9-GISEL-LABEL: test_ldexp_f16_i8:
242 ; GFX9-GISEL: ; %bb.0:
243 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
244 ; GFX9-GISEL-NEXT: v_bfe_i32 v1, v1, 0, 8
245 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff8000
246 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
247 ; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v2, v3
248 ; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
249 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
251 ; GFX11-GISEL-LABEL: test_ldexp_f16_i8:
252 ; GFX11-GISEL: ; %bb.0:
253 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
254 ; GFX11-GISEL-NEXT: v_bfe_i32 v1, v1, 0, 8
255 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff
256 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
257 ; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
258 ; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
259 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
260 %result = call half @llvm.ldexp.f16.i8(half %a, i8 %b)
264 define half @test_ldexp_f16_i16(half %a, i16 %b) {
265 ; GFX6-SDAG-LABEL: test_ldexp_f16_i16:
266 ; GFX6-SDAG: ; %bb.0:
267 ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
268 ; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
269 ; GFX6-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 16
270 ; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
271 ; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
272 ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
274 ; GFX8-LABEL: test_ldexp_f16_i16:
276 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
277 ; GFX8-NEXT: v_ldexp_f16_e32 v0, v0, v1
278 ; GFX8-NEXT: s_setpc_b64 s[30:31]
280 ; GFX9-LABEL: test_ldexp_f16_i16:
282 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
283 ; GFX9-NEXT: v_ldexp_f16_e32 v0, v0, v1
284 ; GFX9-NEXT: s_setpc_b64 s[30:31]
286 ; GFX11-LABEL: test_ldexp_f16_i16:
288 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
289 ; GFX11-NEXT: v_ldexp_f16_e32 v0, v0, v1
290 ; GFX11-NEXT: s_setpc_b64 s[30:31]
292 ; GFX6-GISEL-LABEL: test_ldexp_f16_i16:
293 ; GFX6-GISEL: ; %bb.0:
294 ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
295 ; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
296 ; GFX6-GISEL-NEXT: v_bfe_i32 v1, v1, 0, 16
297 ; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
298 ; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
299 ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
300 %result = call half @llvm.ldexp.f16.i16(half %a, i16 %b)
304 define half @test_ldexp_f16_i32(half %a, i32 %b) {
305 ; GFX6-SDAG-LABEL: test_ldexp_f16_i32:
306 ; GFX6-SDAG: ; %bb.0:
307 ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
308 ; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
309 ; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
310 ; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1
311 ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
313 ; GFX8-SDAG-LABEL: test_ldexp_f16_i32:
314 ; GFX8-SDAG: ; %bb.0:
315 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
316 ; GFX8-SDAG-NEXT: s_movk_i32 s4, 0x8000
317 ; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, 0x7fff
318 ; GFX8-SDAG-NEXT: v_med3_i32 v1, v1, s4, v2
319 ; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
320 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
322 ; GFX9-SDAG-LABEL: test_ldexp_f16_i32:
323 ; GFX9-SDAG: ; %bb.0:
324 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
325 ; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x8000
326 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0x7fff
327 ; GFX9-SDAG-NEXT: v_med3_i32 v1, v1, s4, v2
328 ; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
329 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
331 ; GFX11-SDAG-LABEL: test_ldexp_f16_i32:
332 ; GFX11-SDAG: ; %bb.0:
333 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
334 ; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000
335 ; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
336 ; GFX11-SDAG-NEXT: v_med3_i32 v1, v1, s0, 0x7fff
337 ; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
338 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
340 ; GFX6-GISEL-LABEL: test_ldexp_f16_i32:
341 ; GFX6-GISEL: ; %bb.0:
342 ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
343 ; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
344 ; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
345 ; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
346 ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
348 ; GFX8-GISEL-LABEL: test_ldexp_f16_i32:
349 ; GFX8-GISEL: ; %bb.0:
350 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
351 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff8000
352 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
353 ; GFX8-GISEL-NEXT: v_med3_i32 v1, v1, v2, v3
354 ; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
355 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
357 ; GFX9-GISEL-LABEL: test_ldexp_f16_i32:
358 ; GFX9-GISEL: ; %bb.0:
359 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
360 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff8000
361 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
362 ; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v2, v3
363 ; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
364 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
366 ; GFX11-GISEL-LABEL: test_ldexp_f16_i32:
367 ; GFX11-GISEL: ; %bb.0:
368 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
369 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff
370 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
371 ; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
372 ; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
373 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
374 %result = call half @llvm.ldexp.f16.i32(half %a, i32 %b)
378 define <2 x half> @test_ldexp_v2f16_v2i32(<2 x half> %a, <2 x i32> %b) {
379 ; GFX6-SDAG-LABEL: test_ldexp_v2f16_v2i32:
380 ; GFX6-SDAG: ; %bb.0:
381 ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
382 ; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
383 ; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
384 ; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
385 ; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
386 ; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2
387 ; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v3
388 ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
390 ; GFX8-SDAG-LABEL: test_ldexp_v2f16_v2i32:
391 ; GFX8-SDAG: ; %bb.0:
392 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
393 ; GFX8-SDAG-NEXT: s_movk_i32 s4, 0x8000
394 ; GFX8-SDAG-NEXT: v_mov_b32_e32 v3, 0x7fff
395 ; GFX8-SDAG-NEXT: v_med3_i32 v2, v2, s4, v3
396 ; GFX8-SDAG-NEXT: v_med3_i32 v1, v1, s4, v3
397 ; GFX8-SDAG-NEXT: v_ldexp_f16_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
398 ; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
399 ; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v2
400 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
402 ; GFX9-SDAG-LABEL: test_ldexp_v2f16_v2i32:
403 ; GFX9-SDAG: ; %bb.0:
404 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
405 ; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x8000
406 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x7fff
407 ; GFX9-SDAG-NEXT: v_med3_i32 v2, v2, s4, v3
408 ; GFX9-SDAG-NEXT: v_med3_i32 v1, v1, s4, v3
409 ; GFX9-SDAG-NEXT: v_ldexp_f16_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
410 ; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
411 ; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2
412 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
414 ; GFX11-SDAG-LABEL: test_ldexp_v2f16_v2i32:
415 ; GFX11-SDAG: ; %bb.0:
416 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
417 ; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000
418 ; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v0
419 ; GFX11-SDAG-NEXT: v_med3_i32 v2, v2, s0, 0x7fff
420 ; GFX11-SDAG-NEXT: v_med3_i32 v1, v1, s0, 0x7fff
421 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
422 ; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v2, v3, v2
423 ; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
424 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
425 ; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2
426 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
428 ; GFX6-GISEL-LABEL: test_ldexp_v2f16_v2i32:
429 ; GFX6-GISEL: ; %bb.0:
430 ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
431 ; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
432 ; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
433 ; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v2
434 ; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3
435 ; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
436 ; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
437 ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
439 ; GFX8-GISEL-LABEL: test_ldexp_v2f16_v2i32:
440 ; GFX8-GISEL: ; %bb.0:
441 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
442 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v3, 0xffff8000
443 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0x7fff
444 ; GFX8-GISEL-NEXT: v_med3_i32 v1, v1, v3, v4
445 ; GFX8-GISEL-NEXT: v_med3_i32 v2, v2, v3, v4
446 ; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v1, v0, v1
447 ; GFX8-GISEL-NEXT: v_ldexp_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
448 ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
449 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
451 ; GFX9-GISEL-LABEL: test_ldexp_v2f16_v2i32:
452 ; GFX9-GISEL: ; %bb.0:
453 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
454 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xffff8000
455 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0x7fff
456 ; GFX9-GISEL-NEXT: v_med3_i32 v1, v1, v3, v4
457 ; GFX9-GISEL-NEXT: v_med3_i32 v2, v2, v3, v4
458 ; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v1, v0, v1
459 ; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
460 ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
461 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
463 ; GFX11-GISEL-LABEL: test_ldexp_v2f16_v2i32:
464 ; GFX11-GISEL: ; %bb.0:
465 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
466 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
467 ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
468 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
469 ; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v3
470 ; GFX11-GISEL-NEXT: v_med3_i32 v2, 0xffff8000, v2, v3
471 ; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
472 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
473 ; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v4, v2
474 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
475 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
476 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
477 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
478 %result = call <2 x half> @llvm.ldexp.v2f16.v2i32(<2 x half> %a, <2 x i32> %b)
479 ret <2 x half> %result
482 define <2 x half> @test_ldexp_v2f16_v2i16(<2 x half> %a, <2 x i16> %b) {
483 ; GFX6-SDAG-LABEL: test_ldexp_v2f16_v2i16:
484 ; GFX6-SDAG: ; %bb.0:
485 ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
486 ; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
487 ; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
488 ; GFX6-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 16
489 ; GFX6-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 16
490 ; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
491 ; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
492 ; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2
493 ; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v3
494 ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
496 ; GFX8-SDAG-LABEL: test_ldexp_v2f16_v2i16:
497 ; GFX8-SDAG: ; %bb.0:
498 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
499 ; GFX8-SDAG-NEXT: v_ldexp_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
500 ; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
501 ; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v2
502 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
504 ; GFX9-SDAG-LABEL: test_ldexp_v2f16_v2i16:
505 ; GFX9-SDAG: ; %bb.0:
506 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
507 ; GFX9-SDAG-NEXT: v_ldexp_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
508 ; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
509 ; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2
510 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
512 ; GFX11-SDAG-LABEL: test_ldexp_v2f16_v2i16:
513 ; GFX11-SDAG: ; %bb.0:
514 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
515 ; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v1
516 ; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v0
517 ; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
518 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
519 ; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v2, v3, v2
520 ; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2
521 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
523 ; GFX6-GISEL-LABEL: test_ldexp_v2f16_v2i16:
524 ; GFX6-GISEL: ; %bb.0:
525 ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
526 ; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
527 ; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
528 ; GFX6-GISEL-NEXT: v_bfe_i32 v2, v2, 0, 16
529 ; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v2
530 ; GFX6-GISEL-NEXT: v_bfe_i32 v2, v3, 0, 16
531 ; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2
532 ; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
533 ; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
534 ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
536 ; GFX8-GISEL-LABEL: test_ldexp_v2f16_v2i16:
537 ; GFX8-GISEL: ; %bb.0:
538 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
539 ; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v2, v0, v1
540 ; GFX8-GISEL-NEXT: v_ldexp_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
541 ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
542 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
544 ; GFX9-GISEL-LABEL: test_ldexp_v2f16_v2i16:
545 ; GFX9-GISEL: ; %bb.0:
546 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
547 ; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v2, v0, v1
548 ; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
549 ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v2
550 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
552 ; GFX11-GISEL-LABEL: test_ldexp_v2f16_v2i16:
553 ; GFX11-GISEL: ; %bb.0:
554 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
555 ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
556 ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1
557 ; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
558 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
559 ; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v2, v3
560 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
561 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
562 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
563 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
564 %result = call <2 x half> @llvm.ldexp.v2f16.v2i16(<2 x half> %a, <2 x i16> %b)
565 ret <2 x half> %result
568 declare float @llvm.ldexp.f32.i32(float, i32) #0
569 declare float @llvm.ldexp.f32.i16(float, i16) #0
570 declare float @llvm.ldexp.f32.i64(float, i64) #0
571 declare half @llvm.ldexp.f16.i8(half, i8) #0
572 declare half @llvm.ldexp.f16.i16(half, i16) #0
573 declare half @llvm.ldexp.f16.i32(half, i32) #0
574 declare <2 x half> @llvm.ldexp.v2f16.v2i16(<2 x half>, <2 x i16>) #0
575 declare <2 x half> @llvm.ldexp.v2f16.v2i32(<2 x half>, <2 x i32>) #0
576 declare <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float>, <2 x i32>) #0
577 declare <2 x float> @llvm.ldexp.v2f32.v2i16(<2 x float>, <2 x i16>) #0
578 declare <2 x float> @llvm.ldexp.v2f32.v2i64(<2 x float>, <2 x i64>) #0
579 declare <3 x float> @llvm.ldexp.v3f32.v3i32(<3 x float>, <3 x i32>) #0
580 declare <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float>, <4 x i32>) #0
581 declare double @llvm.ldexp.f64.i32(double, i32) #0
582 declare <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double>, <2 x i32>) #0
584 attributes #0 = { nounwind readnone }