1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; FIXME: Enable f16 promotion
3 ; XUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6-SDAG %s
4 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-SDAG %s
5 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s
6 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG %s
8 ; XUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6-GISEL %s
9 ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s
10 ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
11 ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL %s
13 ; define half @test_ldexp_f16_i16(ptr addrspace(1) %out, half %a, i16 %b) #0 {
14 ; %result = call half @llvm.experimental.constrained.ldexp.f16.i16(half %a, i16 %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
18 define half @test_ldexp_f16_i32(ptr addrspace(1) %out, half %a, i32 %b) #0 {
19 ; GFX8-SDAG-LABEL: test_ldexp_f16_i32:
21 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22 ; GFX8-SDAG-NEXT: s_movk_i32 s4, 0x8000
23 ; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 0x7fff
24 ; GFX8-SDAG-NEXT: v_med3_i32 v0, v3, s4, v0
25 ; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v0, v2, v0
26 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
28 ; GFX9-SDAG-LABEL: test_ldexp_f16_i32:
30 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
31 ; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x8000
32 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0x7fff
33 ; GFX9-SDAG-NEXT: v_med3_i32 v0, v3, s4, v0
34 ; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v2, v0
35 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
37 ; GFX11-SDAG-LABEL: test_ldexp_f16_i32:
38 ; GFX11-SDAG: ; %bb.0:
39 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40 ; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000
41 ; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
42 ; GFX11-SDAG-NEXT: v_med3_i32 v0, v3, s0, 0x7fff
43 ; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v2, v0
44 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
46 ; GFX8-GISEL-LABEL: test_ldexp_f16_i32:
47 ; GFX8-GISEL: ; %bb.0:
48 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, 0xffff8000
50 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0x7fff
51 ; GFX8-GISEL-NEXT: v_med3_i32 v0, v3, v0, v1
52 ; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
53 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
55 ; GFX9-GISEL-LABEL: test_ldexp_f16_i32:
56 ; GFX9-GISEL: ; %bb.0:
57 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
58 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0xffff8000
59 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0x7fff
60 ; GFX9-GISEL-NEXT: v_med3_i32 v0, v3, v0, v1
61 ; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
62 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
64 ; GFX11-GISEL-LABEL: test_ldexp_f16_i32:
65 ; GFX11-GISEL: ; %bb.0:
66 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
67 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x7fff
68 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
69 ; GFX11-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v3, v0
70 ; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0
71 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
72 %result = call half @llvm.experimental.constrained.ldexp.f16.i32(half %a, i32 %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
76 ; define <2 x half> @test_ldexp_v2f16_v2i16(ptr addrspace(1) %out, <2 x half> %a, <2 x i16> %b) #0 {
77 ; %result = call <2 x half> @llvm.experimental.constrained.ldexp.v2f16.v2i16(<2 x half> %a, <2 x i16> %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
78 ; ret <2 x half> %result
81 define <2 x half> @test_ldexp_v2f16_v2i32(ptr addrspace(1) %out, <2 x half> %a, <2 x i32> %b) #0 {
82 ; GFX8-SDAG-LABEL: test_ldexp_v2f16_v2i32:
84 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85 ; GFX8-SDAG-NEXT: s_movk_i32 s4, 0x8000
86 ; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 0x7fff
87 ; GFX8-SDAG-NEXT: v_med3_i32 v1, v3, s4, v0
88 ; GFX8-SDAG-NEXT: v_med3_i32 v0, v4, s4, v0
89 ; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v1, v2, v1
90 ; GFX8-SDAG-NEXT: v_ldexp_f16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
91 ; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v1, v0
92 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
94 ; GFX9-SDAG-LABEL: test_ldexp_v2f16_v2i32:
96 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
97 ; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x8000
98 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0x7fff
99 ; GFX9-SDAG-NEXT: v_med3_i32 v1, v3, s4, v0
100 ; GFX9-SDAG-NEXT: v_med3_i32 v0, v4, s4, v0
101 ; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v1, v2, v1
102 ; GFX9-SDAG-NEXT: v_ldexp_f16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
103 ; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x5040100
104 ; GFX9-SDAG-NEXT: v_perm_b32 v0, v0, v1, s4
105 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
107 ; GFX11-SDAG-LABEL: test_ldexp_v2f16_v2i32:
108 ; GFX11-SDAG: ; %bb.0:
109 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
110 ; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000
111 ; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
112 ; GFX11-SDAG-NEXT: v_med3_i32 v0, v3, s0, 0x7fff
113 ; GFX11-SDAG-NEXT: v_med3_i32 v1, v4, s0, 0x7fff
114 ; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v2
115 ; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v2, v0
116 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
117 ; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v1, v3, v1
118 ; GFX11-SDAG-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
119 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
121 ; GFX8-GISEL-LABEL: test_ldexp_v2f16_v2i32:
122 ; GFX8-GISEL: ; %bb.0:
123 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
124 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, 0xffff8000
125 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0x7fff
126 ; GFX8-GISEL-NEXT: v_med3_i32 v3, v3, v0, v1
127 ; GFX8-GISEL-NEXT: v_med3_i32 v0, v4, v0, v1
128 ; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v3, v2, v3
129 ; GFX8-GISEL-NEXT: v_ldexp_f16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
130 ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v3, v0
131 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
133 ; GFX9-GISEL-LABEL: test_ldexp_v2f16_v2i32:
134 ; GFX9-GISEL: ; %bb.0:
135 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
136 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0xffff8000
137 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0x7fff
138 ; GFX9-GISEL-NEXT: v_med3_i32 v3, v3, v0, v1
139 ; GFX9-GISEL-NEXT: v_med3_i32 v0, v4, v0, v1
140 ; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v3, v2, v3
141 ; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
142 ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v3
143 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
145 ; GFX11-GISEL-LABEL: test_ldexp_v2f16_v2i32:
146 ; GFX11-GISEL: ; %bb.0:
147 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
148 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x7fff
149 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
150 ; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v3, v0
151 ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
152 ; GFX11-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v4, v0
153 ; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v2, v1
154 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
155 ; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v3, v0
156 ; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
157 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
158 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
159 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
160 %result = call <2 x half> @llvm.experimental.constrained.ldexp.v2f16.v2i32(<2 x half> %a, <2 x i32> %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
161 ret <2 x half> %result
164 define <3 x half> @test_ldexp_v3f16_v3i32(ptr addrspace(1) %out, <3 x half> %a, <3 x i32> %b) #0 {
165 ; GFX8-SDAG-LABEL: test_ldexp_v3f16_v3i32:
166 ; GFX8-SDAG: ; %bb.0:
167 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
168 ; GFX8-SDAG-NEXT: s_movk_i32 s4, 0x8000
169 ; GFX8-SDAG-NEXT: v_mov_b32_e32 v1, 0x7fff
170 ; GFX8-SDAG-NEXT: v_med3_i32 v0, v4, s4, v1
171 ; GFX8-SDAG-NEXT: v_med3_i32 v4, v5, s4, v1
172 ; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v0, v2, v0
173 ; GFX8-SDAG-NEXT: v_ldexp_f16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
174 ; GFX8-SDAG-NEXT: v_med3_i32 v1, v6, s4, v1
175 ; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v2
176 ; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v1, v3, v1
177 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
179 ; GFX9-SDAG-LABEL: test_ldexp_v3f16_v3i32:
180 ; GFX9-SDAG: ; %bb.0:
181 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
182 ; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x8000
183 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 0x7fff
184 ; GFX9-SDAG-NEXT: v_med3_i32 v0, v4, s4, v1
185 ; GFX9-SDAG-NEXT: v_med3_i32 v4, v5, s4, v1
186 ; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v2, v0
187 ; GFX9-SDAG-NEXT: v_ldexp_f16_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
188 ; GFX9-SDAG-NEXT: s_mov_b32 s5, 0x5040100
189 ; GFX9-SDAG-NEXT: v_med3_i32 v1, v6, s4, v1
190 ; GFX9-SDAG-NEXT: v_perm_b32 v0, v2, v0, s5
191 ; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v1, v3, v1
192 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
194 ; GFX11-SDAG-LABEL: test_ldexp_v3f16_v3i32:
195 ; GFX11-SDAG: ; %bb.0:
196 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
197 ; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000
198 ; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
199 ; GFX11-SDAG-NEXT: v_med3_i32 v0, v4, s0, 0x7fff
200 ; GFX11-SDAG-NEXT: v_med3_i32 v1, v5, s0, 0x7fff
201 ; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v2
202 ; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v2, v0
203 ; GFX11-SDAG-NEXT: v_med3_i32 v2, v6, s0, 0x7fff
204 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
205 ; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v1, v4, v1
206 ; GFX11-SDAG-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
207 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3)
208 ; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v1, v3, v2
209 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
211 ; GFX8-GISEL-LABEL: test_ldexp_v3f16_v3i32:
212 ; GFX8-GISEL: ; %bb.0:
213 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
214 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, 0xffff8000
215 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0x7fff
216 ; GFX8-GISEL-NEXT: v_med3_i32 v4, v4, v0, v1
217 ; GFX8-GISEL-NEXT: v_med3_i32 v5, v5, v0, v1
218 ; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v4, v2, v4
219 ; GFX8-GISEL-NEXT: v_ldexp_f16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
220 ; GFX8-GISEL-NEXT: v_med3_i32 v0, v6, v0, v1
221 ; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v1, v3, v0
222 ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v2
223 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
225 ; GFX9-GISEL-LABEL: test_ldexp_v3f16_v3i32:
226 ; GFX9-GISEL: ; %bb.0:
227 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
228 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0xffff8000
229 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0x7fff
230 ; GFX9-GISEL-NEXT: v_med3_i32 v4, v4, v0, v1
231 ; GFX9-GISEL-NEXT: v_med3_i32 v5, v5, v0, v1
232 ; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v4, v2, v4
233 ; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
234 ; GFX9-GISEL-NEXT: v_med3_i32 v0, v6, v0, v1
235 ; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v1, v3, v0
236 ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v4
237 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
239 ; GFX11-GISEL-LABEL: test_ldexp_v3f16_v3i32:
240 ; GFX11-GISEL: ; %bb.0:
241 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
242 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x7fff
243 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
244 ; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v4, v0
245 ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2
246 ; GFX11-GISEL-NEXT: v_med3_i32 v5, 0xffff8000, v5, v0
247 ; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v2, v1
248 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
249 ; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v2, v4, v5
250 ; GFX11-GISEL-NEXT: v_med3_i32 v4, 0xffff8000, v6, v0
251 ; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
252 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
253 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v1
254 ; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v3, v4
255 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
256 %result = call <3 x half> @llvm.experimental.constrained.ldexp.v3f16.v3i32(<3 x half> %a, <3 x i32> %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
257 ret <3 x half> %result
260 define <4 x half> @test_ldexp_v4f16_v4i32(ptr addrspace(1) %out, <4 x half> %a, <4 x i32> %b) #0 {
261 ; GFX8-SDAG-LABEL: test_ldexp_v4f16_v4i32:
262 ; GFX8-SDAG: ; %bb.0:
263 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
264 ; GFX8-SDAG-NEXT: s_movk_i32 s4, 0x8000
265 ; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 0x7fff
266 ; GFX8-SDAG-NEXT: v_med3_i32 v1, v7, s4, v0
267 ; GFX8-SDAG-NEXT: v_med3_i32 v6, v6, s4, v0
268 ; GFX8-SDAG-NEXT: v_med3_i32 v5, v5, s4, v0
269 ; GFX8-SDAG-NEXT: v_med3_i32 v0, v4, s4, v0
270 ; GFX8-SDAG-NEXT: v_ldexp_f16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
271 ; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v3, v3, v6
272 ; GFX8-SDAG-NEXT: v_ldexp_f16_sdwa v5, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
273 ; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v0, v2, v0
274 ; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v5
275 ; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v3, v1
276 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
278 ; GFX9-SDAG-LABEL: test_ldexp_v4f16_v4i32:
279 ; GFX9-SDAG: ; %bb.0:
280 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
281 ; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x8000
282 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0x7fff
283 ; GFX9-SDAG-NEXT: v_med3_i32 v1, v6, s4, v0
284 ; GFX9-SDAG-NEXT: v_med3_i32 v6, v7, s4, v0
285 ; GFX9-SDAG-NEXT: v_med3_i32 v4, v4, s4, v0
286 ; GFX9-SDAG-NEXT: v_med3_i32 v0, v5, s4, v0
287 ; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v1, v3, v1
288 ; GFX9-SDAG-NEXT: v_ldexp_f16_sdwa v3, v3, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
289 ; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v4, v2, v4
290 ; GFX9-SDAG-NEXT: v_ldexp_f16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
291 ; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x5040100
292 ; GFX9-SDAG-NEXT: v_perm_b32 v0, v0, v4, s4
293 ; GFX9-SDAG-NEXT: v_perm_b32 v1, v3, v1, s4
294 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
296 ; GFX11-SDAG-LABEL: test_ldexp_v4f16_v4i32:
297 ; GFX11-SDAG: ; %bb.0:
298 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
299 ; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000
300 ; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
301 ; GFX11-SDAG-NEXT: v_med3_i32 v0, v6, s0, 0x7fff
302 ; GFX11-SDAG-NEXT: v_med3_i32 v1, v7, s0, 0x7fff
303 ; GFX11-SDAG-NEXT: v_med3_i32 v4, v4, s0, 0x7fff
304 ; GFX11-SDAG-NEXT: v_med3_i32 v5, v5, s0, 0x7fff
305 ; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v2
306 ; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v7, 16, v3
307 ; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v3, v3, v0
308 ; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v2, v4
309 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
310 ; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v2, v6, v5
311 ; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v1, v7, v1
312 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
313 ; GFX11-SDAG-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
314 ; GFX11-SDAG-NEXT: v_perm_b32 v1, v1, v3, 0x5040100
315 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
317 ; GFX8-GISEL-LABEL: test_ldexp_v4f16_v4i32:
318 ; GFX8-GISEL: ; %bb.0:
319 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
320 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, 0xffff8000
321 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0x7fff
322 ; GFX8-GISEL-NEXT: v_med3_i32 v4, v4, v0, v1
323 ; GFX8-GISEL-NEXT: v_med3_i32 v5, v5, v0, v1
324 ; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v4, v2, v4
325 ; GFX8-GISEL-NEXT: v_ldexp_f16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
326 ; GFX8-GISEL-NEXT: v_med3_i32 v5, v6, v0, v1
327 ; GFX8-GISEL-NEXT: v_med3_i32 v0, v7, v0, v1
328 ; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v5, v3, v5
329 ; GFX8-GISEL-NEXT: v_ldexp_f16_sdwa v1, v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
330 ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v2
331 ; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v5, v1
332 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
334 ; GFX9-GISEL-LABEL: test_ldexp_v4f16_v4i32:
335 ; GFX9-GISEL: ; %bb.0:
336 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
337 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0xffff8000
338 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0x7fff
339 ; GFX9-GISEL-NEXT: v_med3_i32 v4, v4, v0, v1
340 ; GFX9-GISEL-NEXT: v_med3_i32 v5, v5, v0, v1
341 ; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v4, v2, v4
342 ; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
343 ; GFX9-GISEL-NEXT: v_med3_i32 v5, v6, v0, v1
344 ; GFX9-GISEL-NEXT: v_med3_i32 v0, v7, v0, v1
345 ; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v5, v3, v5
346 ; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
347 ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v4
348 ; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, v1, 16, v5
349 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
351 ; GFX11-GISEL-LABEL: test_ldexp_v4f16_v4i32:
352 ; GFX11-GISEL: ; %bb.0:
353 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
354 ; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x7fff
355 ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v2
356 ; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v3
357 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4)
358 ; GFX11-GISEL-NEXT: v_med3_i32 v4, 0xffff8000, v4, v0
359 ; GFX11-GISEL-NEXT: v_med3_i32 v6, 0xffff8000, v6, v0
360 ; GFX11-GISEL-NEXT: v_med3_i32 v5, 0xffff8000, v5, v0
361 ; GFX11-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v7, v0
362 ; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v2, v2, v4
363 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
364 ; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v3, v3, v6
365 ; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v1, v5
366 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
367 ; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v4, v8, v0
368 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v2
369 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
370 ; GFX11-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v3
371 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
372 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
373 ; GFX11-GISEL-NEXT: v_lshl_or_b32 v1, v4, 16, v2
374 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
375 %result = call <4 x half> @llvm.experimental.constrained.ldexp.v4f16.v4i32(<4 x half> %a, <4 x i32> %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
376 ret <4 x half> %result
379 declare half @llvm.experimental.constrained.ldexp.f16.i16(half, i16, metadata, metadata) #1
380 declare half @llvm.experimental.constrained.ldexp.f16.i32(half, i32, metadata, metadata) #1
381 declare <2 x half> @llvm.experimental.constrained.ldexp.v2f16.v2i16(<2 x half>, <2 x i16>, metadata, metadata) #1
382 declare <2 x half> @llvm.experimental.constrained.ldexp.v2f16.v2i32(<2 x half>, <2 x i32>, metadata, metadata) #1
383 declare <3 x half> @llvm.experimental.constrained.ldexp.v3f16.v3i32(<3 x half>, <3 x i32>, metadata, metadata) #1
384 declare <4 x half> @llvm.experimental.constrained.ldexp.v4f16.v4i32(<4 x half>, <4 x i32>, metadata, metadata) #1
386 attributes #0 = { strictfp }
387 attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
388 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: