1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; Test no legal f16. Should just keep the cast to f32 and
3 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-SDAG %s
4 ; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-GISEL %s
6 ; Test legal f16, no f16 fmed3. Should expand to min/max sequence
7 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-SDAG %s
8 ; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s
10 ; Legal f16 med3. InstCombine ought to shrink the f32 op to f16 so the codegen doesn't really matter for this.
11 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s
12 ; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
15 declare float @llvm.amdgcn.fmed3.f32(float, float, float) #0
16 declare float @llvm.fabs.f32(float) #0
17 declare half @llvm.fabs.f16(half) #0
19 define half @fmed3_f32_fpext_f16(half %arg0, half %arg1, half %arg2) #1 {
20 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16:
22 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23 ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2
24 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
25 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
26 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
28 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16:
29 ; GFX7-GISEL: ; %bb.0:
30 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
31 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
32 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
33 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
34 ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2
35 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
36 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
38 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16:
40 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41 ; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v0, v1
42 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
43 ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v2
44 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v3, v0
45 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
47 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16:
48 ; GFX8-GISEL: ; %bb.0:
49 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
50 ; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v0, v1
51 ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
52 ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v3, v2
53 ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
54 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
56 ; GFX9-LABEL: fmed3_f32_fpext_f16:
58 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
59 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
60 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
61 ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2
62 ; GFX9-NEXT: v_med3_f32 v0, v0, v1, v2
63 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
64 ; GFX9-NEXT: s_setpc_b64 s[30:31]
65 %arg0.ext = fpext half %arg0 to float
66 %arg1.ext = fpext half %arg1 to float
67 %arg2.ext = fpext half %arg2 to float
68 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
69 %cast = fptrunc float %med3 to half
73 define half @fmed3_f32_fpext_f16_flags(half %arg0, half %arg1, half %arg2) #1 {
74 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_flags:
76 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
77 ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2
78 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
79 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
80 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
82 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_flags:
83 ; GFX7-GISEL: ; %bb.0:
84 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
86 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
87 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
88 ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2
89 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
90 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
92 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_flags:
94 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
95 ; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v0, v1
96 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
97 ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v2
98 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v3, v0
99 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
101 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_flags:
102 ; GFX8-GISEL: ; %bb.0:
103 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104 ; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v0, v1
105 ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
106 ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v3, v2
107 ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
108 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
110 ; GFX9-LABEL: fmed3_f32_fpext_f16_flags:
112 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
113 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
114 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
115 ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2
116 ; GFX9-NEXT: v_med3_f32 v0, v0, v1, v2
117 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
118 ; GFX9-NEXT: s_setpc_b64 s[30:31]
119 %arg0.ext = fpext half %arg0 to float
120 %arg1.ext = fpext half %arg1 to float
121 %arg2.ext = fpext half %arg2 to float
122 %med3 = call nsz float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
123 %cast = fptrunc float %med3 to half
127 define half @fmed3_f32_fpext_f16_multi_use(half %arg0, half %arg1, half %arg2, ptr addrspace(1) %ptr) #1 {
128 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_multi_use:
129 ; GFX7-SDAG: ; %bb.0:
130 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
131 ; GFX7-SDAG-NEXT: v_med3_f32 v1, v0, v1, v2
132 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v1
133 ; GFX7-SDAG-NEXT: flat_store_dword v[3:4], v1
134 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
135 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
136 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
138 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_multi_use:
139 ; GFX7-GISEL: ; %bb.0:
140 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
142 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
143 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
144 ; GFX7-GISEL-NEXT: v_med3_f32 v1, v0, v1, v2
145 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v1
146 ; GFX7-GISEL-NEXT: flat_store_dword v[3:4], v1
147 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
148 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
150 ; GFX8-LABEL: fmed3_f32_fpext_f16_multi_use:
152 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
153 ; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0
154 ; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1
155 ; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2
156 ; GFX8-NEXT: v_med3_f32 v1, v0, v1, v2
157 ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v1
158 ; GFX8-NEXT: flat_store_dword v[3:4], v1
159 ; GFX8-NEXT: s_waitcnt vmcnt(0)
160 ; GFX8-NEXT: s_setpc_b64 s[30:31]
162 ; GFX9-LABEL: fmed3_f32_fpext_f16_multi_use:
164 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
165 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
166 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
167 ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2
168 ; GFX9-NEXT: v_med3_f32 v1, v0, v1, v2
169 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v1
170 ; GFX9-NEXT: global_store_dword v[3:4], v1, off
171 ; GFX9-NEXT: s_waitcnt vmcnt(0)
172 ; GFX9-NEXT: s_setpc_b64 s[30:31]
173 %arg0.ext = fpext half %arg0 to float
174 %arg1.ext = fpext half %arg1 to float
175 %arg2.ext = fpext half %arg2 to float
176 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
177 store float %med3, ptr addrspace(1) %ptr
178 %cast = fptrunc float %med3 to half
182 define half @fmed3_f32_fpext_f16_k0(half %arg1, half %arg2) #1 {
183 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_k0:
184 ; GFX7-SDAG: ; %bb.0:
185 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
186 ; GFX7-SDAG-NEXT: v_med3_f32 v0, 2.0, v0, v1
187 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
188 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
189 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
191 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_k0:
192 ; GFX7-GISEL: ; %bb.0:
193 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
194 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
195 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
196 ; GFX7-GISEL-NEXT: v_med3_f32 v0, 2.0, v0, v1
197 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
198 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
200 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_k0:
201 ; GFX8-SDAG: ; %bb.0:
202 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
203 ; GFX8-SDAG-NEXT: v_max_f16_e32 v2, 2.0, v0
204 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, 2.0, v0
205 ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
206 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v2, v0
207 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
209 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_k0:
210 ; GFX8-GISEL: ; %bb.0:
211 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
212 ; GFX8-GISEL-NEXT: v_min_f16_e32 v2, 2.0, v0
213 ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, 2.0, v0
214 ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v2, v1
215 ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
216 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
218 ; GFX9-LABEL: fmed3_f32_fpext_f16_k0:
220 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
221 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
222 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
223 ; GFX9-NEXT: v_med3_f32 v0, 2.0, v0, v1
224 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
225 ; GFX9-NEXT: s_setpc_b64 s[30:31]
226 %arg1.ext = fpext half %arg1 to float
227 %arg2.ext = fpext half %arg2 to float
228 %med3 = call float @llvm.amdgcn.fmed3.f32(float 2.0, float %arg1.ext, float %arg2.ext)
229 %cast = fptrunc float %med3 to half
233 define half @fmed3_f32_fpext_f16_k1(half %arg0, half %arg2) #1 {
234 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_k1:
235 ; GFX7-SDAG: ; %bb.0:
236 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
237 ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, 2.0, v1
238 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
239 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
240 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
242 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_k1:
243 ; GFX7-GISEL: ; %bb.0:
244 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
245 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
246 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
247 ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, 2.0, v1
248 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
249 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
251 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_k1:
252 ; GFX8-SDAG: ; %bb.0:
253 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
254 ; GFX8-SDAG-NEXT: v_max_f16_e32 v2, 2.0, v0
255 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, 2.0, v0
256 ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
257 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v2, v0
258 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
260 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_k1:
261 ; GFX8-GISEL: ; %bb.0:
262 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
263 ; GFX8-GISEL-NEXT: v_min_f16_e32 v2, 2.0, v0
264 ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, 2.0, v0
265 ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v2, v1
266 ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
267 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
269 ; GFX9-LABEL: fmed3_f32_fpext_f16_k1:
271 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
272 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
273 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
274 ; GFX9-NEXT: v_med3_f32 v0, v0, 2.0, v1
275 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
276 ; GFX9-NEXT: s_setpc_b64 s[30:31]
277 %arg0.ext = fpext half %arg0 to float
278 %arg2.ext = fpext half %arg2 to float
279 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float 2.0, float %arg2.ext)
280 %cast = fptrunc float %med3 to half
284 define half @fmed3_f32_fpext_f16_k2(half %arg0, half %arg1) #1 {
285 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_k2:
286 ; GFX7-SDAG: ; %bb.0:
287 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
288 ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, 2.0
289 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
290 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
291 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
293 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_k2:
294 ; GFX7-GISEL: ; %bb.0:
295 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
296 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
297 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
298 ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, 2.0
299 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
300 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
302 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_k2:
303 ; GFX8-SDAG: ; %bb.0:
304 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
305 ; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v0, v1
306 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
307 ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, 2.0, v0
308 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v2, v0
309 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
311 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_k2:
312 ; GFX8-GISEL: ; %bb.0:
313 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
314 ; GFX8-GISEL-NEXT: v_min_f16_e32 v2, v0, v1
315 ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
316 ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, 2.0, v2
317 ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
318 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
320 ; GFX9-LABEL: fmed3_f32_fpext_f16_k2:
322 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
323 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
324 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
325 ; GFX9-NEXT: v_med3_f32 v0, v0, v1, 2.0
326 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
327 ; GFX9-NEXT: s_setpc_b64 s[30:31]
328 %arg0.ext = fpext half %arg0 to float
329 %arg1.ext = fpext half %arg1 to float
330 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float 2.0)
331 %cast = fptrunc float %med3 to half
335 define half @fmed3_f32_fpext_f16_k0_k1(half %arg2) #1 {
336 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_k0_k1:
337 ; GFX7-SDAG: ; %bb.0:
338 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
339 ; GFX7-SDAG-NEXT: v_mov_b32_e32 v1, 0x41800000
340 ; GFX7-SDAG-NEXT: v_med3_f32 v0, 0, v1, v0
341 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
342 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
343 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
345 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_k0_k1:
346 ; GFX7-GISEL: ; %bb.0:
347 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
348 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
349 ; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, 0x41800000
350 ; GFX7-GISEL-NEXT: v_med3_f32 v0, 0, v1, v0
351 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
352 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
354 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_k0_k1:
355 ; GFX8-SDAG: ; %bb.0:
356 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
357 ; GFX8-SDAG-NEXT: v_mov_b32_e32 v1, 0x4c00
358 ; GFX8-SDAG-NEXT: v_max_f16_e32 v2, 0, v1
359 ; GFX8-SDAG-NEXT: v_min_f16_e32 v1, 0, v1
360 ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v1, v0
361 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v2, v0
362 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
364 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_k0_k1:
365 ; GFX8-GISEL: ; %bb.0:
366 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
367 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0x4c00
368 ; GFX8-GISEL-NEXT: v_min_f16_e32 v2, 0, v1
369 ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, 0, v1
370 ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v2, v0
371 ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v1, v0
372 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
374 ; GFX9-LABEL: fmed3_f32_fpext_f16_k0_k1:
376 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
377 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
378 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x41800000
379 ; GFX9-NEXT: v_med3_f32 v0, 0, v1, v0
380 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
381 ; GFX9-NEXT: s_setpc_b64 s[30:31]
382 %arg2.ext = fpext half %arg2 to float
383 %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 16.0, float %arg2.ext)
384 %cast = fptrunc float %med3 to half
388 define half @fmed3_f32_fpext_f16_k0_k2(half %arg1) #1 {
389 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_k0_k2:
390 ; GFX7-SDAG: ; %bb.0:
391 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
392 ; GFX7-SDAG-NEXT: v_med3_f32 v0, 0, v0, 2.0
393 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
394 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
395 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
397 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_k0_k2:
398 ; GFX7-GISEL: ; %bb.0:
399 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
400 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
401 ; GFX7-GISEL-NEXT: v_med3_f32 v0, 0, v0, 2.0
402 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
403 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
405 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_k0_k2:
406 ; GFX8-SDAG: ; %bb.0:
407 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
408 ; GFX8-SDAG-NEXT: v_max_f16_e32 v1, 0, v0
409 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, 0, v0
410 ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, 2.0, v0
411 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v1, v0
412 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
414 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_k0_k2:
415 ; GFX8-GISEL: ; %bb.0:
416 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
417 ; GFX8-GISEL-NEXT: v_min_f16_e32 v1, 0, v0
418 ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, 0, v0
419 ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, 2.0, v1
420 ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
421 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
423 ; GFX9-LABEL: fmed3_f32_fpext_f16_k0_k2:
425 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
426 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
427 ; GFX9-NEXT: v_med3_f32 v0, 0, v0, 2.0
428 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
429 ; GFX9-NEXT: s_setpc_b64 s[30:31]
430 %arg1.ext = fpext half %arg1 to float
431 %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %arg1.ext, float 2.0)
432 %cast = fptrunc float %med3 to half
436 define half @fmed3_f32_fpext_f16_fabs(half %arg0, half %arg1, half %arg2) #1 {
437 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_fabs:
438 ; GFX7-SDAG: ; %bb.0:
439 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
440 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
441 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
442 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
443 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
444 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v1|
445 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v2, |v2|
446 ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2
447 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
448 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
449 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
451 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_fabs:
452 ; GFX7-GISEL: ; %bb.0:
453 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
454 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v0, |v0|
455 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v1, |v1|
456 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v2, |v2|
457 ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2
458 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
459 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
461 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_fabs:
462 ; GFX8-SDAG: ; %bb.0:
463 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
464 ; GFX8-SDAG-NEXT: v_max_f16_e64 v3, |v0|, |v1|
465 ; GFX8-SDAG-NEXT: v_min_f16_e64 v0, |v0|, |v1|
466 ; GFX8-SDAG-NEXT: v_max_f16_e64 v0, v0, |v2|
467 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v3, v0
468 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
470 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_fabs:
471 ; GFX8-GISEL: ; %bb.0:
472 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
473 ; GFX8-GISEL-NEXT: v_min_f16_e64 v3, |v0|, |v1|
474 ; GFX8-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v1|
475 ; GFX8-GISEL-NEXT: v_max_f16_e64 v1, v3, |v2|
476 ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
477 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
479 ; GFX9-LABEL: fmed3_f32_fpext_f16_fabs:
481 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
482 ; GFX9-NEXT: v_cvt_f32_f16_e64 v0, |v0|
483 ; GFX9-NEXT: v_cvt_f32_f16_e64 v1, |v1|
484 ; GFX9-NEXT: v_cvt_f32_f16_e64 v2, |v2|
485 ; GFX9-NEXT: v_med3_f32 v0, v0, v1, v2
486 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
487 ; GFX9-NEXT: s_setpc_b64 s[30:31]
488 %fabs.arg0 = call half @llvm.fabs.f16(half %arg0)
489 %fabs.arg1 = call half @llvm.fabs.f16(half %arg1)
490 %fabs.arg2 = call half @llvm.fabs.f16(half %arg2)
491 %arg0.ext = fpext half %fabs.arg0 to float
492 %arg1.ext = fpext half %fabs.arg1 to float
493 %arg2.ext = fpext half %fabs.arg2 to float
494 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
495 %cast = fptrunc float %med3 to half
499 define half @fmed3_fabs_f32_fpext_f16(half %arg0, half %arg1, half %arg2) #1 {
500 ; GFX7-SDAG-LABEL: fmed3_fabs_f32_fpext_f16:
501 ; GFX7-SDAG: ; %bb.0:
502 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
503 ; GFX7-SDAG-NEXT: v_med3_f32 v0, |v0|, |v1|, |v2|
504 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
505 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
506 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
508 ; GFX7-GISEL-LABEL: fmed3_fabs_f32_fpext_f16:
509 ; GFX7-GISEL: ; %bb.0:
510 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
511 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
512 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
513 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
514 ; GFX7-GISEL-NEXT: v_med3_f32 v0, |v0|, |v1|, |v2|
515 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
516 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
518 ; GFX8-LABEL: fmed3_fabs_f32_fpext_f16:
520 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
521 ; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0
522 ; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1
523 ; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2
524 ; GFX8-NEXT: v_med3_f32 v0, |v0|, |v1|, |v2|
525 ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
526 ; GFX8-NEXT: s_setpc_b64 s[30:31]
528 ; GFX9-LABEL: fmed3_fabs_f32_fpext_f16:
530 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
531 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
532 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
533 ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2
534 ; GFX9-NEXT: v_med3_f32 v0, |v0|, |v1|, |v2|
535 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
536 ; GFX9-NEXT: s_setpc_b64 s[30:31]
537 %arg0.ext = fpext half %arg0 to float
538 %arg1.ext = fpext half %arg1 to float
539 %arg2.ext = fpext half %arg2 to float
540 %fabs.ext.arg0 = call float @llvm.fabs.f32(float %arg0.ext)
541 %fabs.ext.arg1 = call float @llvm.fabs.f32(float %arg1.ext)
542 %fabs.ext.arg2 = call float @llvm.fabs.f32(float %arg2.ext)
543 %med3 = call float @llvm.amdgcn.fmed3.f32(float %fabs.ext.arg0, float %fabs.ext.arg1, float %fabs.ext.arg2)
544 %cast = fptrunc float %med3 to half
548 define half @fmed3_f32_fpext_f16_fneg(half %arg0, half %arg1, half %arg2) #1 {
549 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_fneg:
550 ; GFX7-SDAG: ; %bb.0:
551 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
552 ; GFX7-SDAG-NEXT: v_med3_f32 v0, -v0, -v1, -v2
553 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
554 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
555 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
557 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_fneg:
558 ; GFX7-GISEL: ; %bb.0:
559 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
560 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0
561 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v1, -v1
562 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v2, -v2
563 ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2
564 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
565 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
567 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_fneg:
568 ; GFX8-SDAG: ; %bb.0:
569 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
570 ; GFX8-SDAG-NEXT: v_max_f16_e64 v3, -v0, -v1
571 ; GFX8-SDAG-NEXT: v_min_f16_e64 v0, -v0, -v1
572 ; GFX8-SDAG-NEXT: v_max_f16_e64 v0, v0, -v2
573 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v3, v0
574 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
576 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_fneg:
577 ; GFX8-GISEL: ; %bb.0:
578 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
579 ; GFX8-GISEL-NEXT: v_min_f16_e64 v3, -v0, -v1
580 ; GFX8-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v1
581 ; GFX8-GISEL-NEXT: v_max_f16_e64 v1, v3, -v2
582 ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
583 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
585 ; GFX9-LABEL: fmed3_f32_fpext_f16_fneg:
587 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
588 ; GFX9-NEXT: v_cvt_f32_f16_e64 v0, -v0
589 ; GFX9-NEXT: v_cvt_f32_f16_e64 v1, -v1
590 ; GFX9-NEXT: v_cvt_f32_f16_e64 v2, -v2
591 ; GFX9-NEXT: v_med3_f32 v0, v0, v1, v2
592 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
593 ; GFX9-NEXT: s_setpc_b64 s[30:31]
594 %fneg.arg0 = fneg half %arg0
595 %fneg.arg1 = fneg half %arg1
596 %fneg.arg2 = fneg half %arg2
597 %arg0.ext = fpext half %fneg.arg0 to float
598 %arg1.ext = fpext half %fneg.arg1 to float
599 %arg2.ext = fpext half %fneg.arg2 to float
600 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
601 %cast = fptrunc float %med3 to half
605 define half @fmed3_fneg_f32_fpext_f16(half %arg0, half %arg1, half %arg2) #1 {
606 ; GFX7-SDAG-LABEL: fmed3_fneg_f32_fpext_f16:
607 ; GFX7-SDAG: ; %bb.0:
608 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
609 ; GFX7-SDAG-NEXT: v_med3_f32 v0, -v0, -v1, -v2
610 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
611 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
612 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
614 ; GFX7-GISEL-LABEL: fmed3_fneg_f32_fpext_f16:
615 ; GFX7-GISEL: ; %bb.0:
616 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
617 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
618 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
619 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
620 ; GFX7-GISEL-NEXT: v_med3_f32 v0, -v0, -v1, -v2
621 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
622 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
624 ; GFX8-LABEL: fmed3_fneg_f32_fpext_f16:
626 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
627 ; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0
628 ; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1
629 ; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2
630 ; GFX8-NEXT: v_med3_f32 v0, -v0, -v1, -v2
631 ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
632 ; GFX8-NEXT: s_setpc_b64 s[30:31]
634 ; GFX9-LABEL: fmed3_fneg_f32_fpext_f16:
636 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
637 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
638 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
639 ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2
640 ; GFX9-NEXT: v_med3_f32 v0, -v0, -v1, -v2
641 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
642 ; GFX9-NEXT: s_setpc_b64 s[30:31]
643 %arg0.ext = fpext half %arg0 to float
644 %arg1.ext = fpext half %arg1 to float
645 %arg2.ext = fpext half %arg2 to float
646 %fneg.ext.arg0 = fneg float %arg0.ext
647 %fneg.ext.arg1 = fneg float %arg1.ext
648 %fneg.ext.arg2 = fneg float %arg2.ext
649 %med3 = call float @llvm.amdgcn.fmed3.f32(float %fneg.ext.arg0, float %fneg.ext.arg1, float %fneg.ext.arg2)
650 %cast = fptrunc float %med3 to half
654 define half @fmed3_f32_fpext_f16_fneg_fabs(half %arg0, half %arg1, half %arg2) #1 {
655 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_fneg_fabs:
656 ; GFX7-SDAG: ; %bb.0:
657 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
658 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
659 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
660 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
661 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
662 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v1|
663 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v2, |v2|
664 ; GFX7-SDAG-NEXT: v_med3_f32 v0, -v0, -v1, -v2
665 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
666 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
667 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
669 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_fneg_fabs:
670 ; GFX7-GISEL: ; %bb.0:
671 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
672 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
673 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v1, -|v1|
674 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v2, -|v2|
675 ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2
676 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
677 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
679 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_fneg_fabs:
680 ; GFX8-SDAG: ; %bb.0:
681 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
682 ; GFX8-SDAG-NEXT: v_max_f16_e64 v3, -|v0|, -|v1|
683 ; GFX8-SDAG-NEXT: v_min_f16_e64 v0, -|v0|, -|v1|
684 ; GFX8-SDAG-NEXT: v_max_f16_e64 v0, v0, -|v2|
685 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v3, v0
686 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
688 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_fneg_fabs:
689 ; GFX8-GISEL: ; %bb.0:
690 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
691 ; GFX8-GISEL-NEXT: v_min_f16_e64 v3, -|v0|, -|v1|
692 ; GFX8-GISEL-NEXT: v_max_f16_e64 v0, -|v0|, -|v1|
693 ; GFX8-GISEL-NEXT: v_max_f16_e64 v1, v3, -|v2|
694 ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
695 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
697 ; GFX9-LABEL: fmed3_f32_fpext_f16_fneg_fabs:
699 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
700 ; GFX9-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
701 ; GFX9-NEXT: v_cvt_f32_f16_e64 v1, -|v1|
702 ; GFX9-NEXT: v_cvt_f32_f16_e64 v2, -|v2|
703 ; GFX9-NEXT: v_med3_f32 v0, v0, v1, v2
704 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
705 ; GFX9-NEXT: s_setpc_b64 s[30:31]
706 %fabs.arg0 = call half @llvm.fabs.f16(half %arg0)
707 %fabs.arg1 = call half @llvm.fabs.f16(half %arg1)
708 %fabs.arg2 = call half @llvm.fabs.f16(half %arg2)
709 %fneg.fabs.arg0 = fneg half %fabs.arg0
710 %fneg.fabs.arg1 = fneg half %fabs.arg1
711 %fneg.fabs.arg2 = fneg half %fabs.arg2
712 %arg0.ext = fpext half %fneg.fabs.arg0 to float
713 %arg1.ext = fpext half %fneg.fabs.arg1 to float
714 %arg2.ext = fpext half %fneg.fabs.arg2 to float
715 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
716 %cast = fptrunc float %med3 to half
720 define half @fmed3_fneg_fabs_f32_fpext_f16(half %arg0, half %arg1, half %arg2) #1 {
721 ; GFX7-SDAG-LABEL: fmed3_fneg_fabs_f32_fpext_f16:
722 ; GFX7-SDAG: ; %bb.0:
723 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
724 ; GFX7-SDAG-NEXT: v_med3_f32 v0, -|v0|, -|v1|, -|v2|
725 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
726 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
727 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
729 ; GFX7-GISEL-LABEL: fmed3_fneg_fabs_f32_fpext_f16:
730 ; GFX7-GISEL: ; %bb.0:
731 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
732 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
733 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
734 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
735 ; GFX7-GISEL-NEXT: v_med3_f32 v0, -|v0|, -|v1|, -|v2|
736 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
737 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
739 ; GFX8-LABEL: fmed3_fneg_fabs_f32_fpext_f16:
741 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
742 ; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0
743 ; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1
744 ; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2
745 ; GFX8-NEXT: v_med3_f32 v0, -|v0|, -|v1|, -|v2|
746 ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
747 ; GFX8-NEXT: s_setpc_b64 s[30:31]
749 ; GFX9-LABEL: fmed3_fneg_fabs_f32_fpext_f16:
751 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
752 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
753 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
754 ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2
755 ; GFX9-NEXT: v_med3_f32 v0, -|v0|, -|v1|, -|v2|
756 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
757 ; GFX9-NEXT: s_setpc_b64 s[30:31]
758 %arg0.ext = fpext half %arg0 to float
759 %arg1.ext = fpext half %arg1 to float
760 %arg2.ext = fpext half %arg2 to float
761 %fabs.ext.arg0 = call float @llvm.fabs.f32(float %arg0.ext)
762 %fabs.ext.arg1 = call float @llvm.fabs.f32(float %arg1.ext)
763 %fabs.ext.arg2 = call float @llvm.fabs.f32(float %arg2.ext)
764 %fneg.fabs.ext.arg0 = fneg float %fabs.ext.arg0
765 %fneg.fabs.ext.arg1 = fneg float %fabs.ext.arg1
766 %fneg.fabs.ext.arg2 = fneg float %fabs.ext.arg2
767 %med3 = call float @llvm.amdgcn.fmed3.f32(float %fneg.fabs.ext.arg0, float %fneg.fabs.ext.arg1, float %fneg.fabs.ext.arg2)
768 %cast = fptrunc float %med3 to half
771 ; --------------------------------------------------------------------------------
773 ; --------------------------------------------------------------------------------
775 define bfloat @fmed3_f32_fpext_f16_fptrunc_bf16(half %arg0, half %arg1, half %arg2) #1 {
776 ; GFX7-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16:
778 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
779 ; GFX7-NEXT: v_med3_f32 v0, v0, v1, v2
780 ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
781 ; GFX7-NEXT: s_setpc_b64 s[30:31]
783 ; GFX8-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16:
785 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
786 ; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0
787 ; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1
788 ; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2
789 ; GFX8-NEXT: v_med3_f32 v0, v0, v1, v2
790 ; GFX8-NEXT: v_bfe_u32 v1, v0, 16, 1
791 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v0
792 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x7fff, v1
793 ; GFX8-NEXT: v_or_b32_e32 v2, 0x400000, v0
794 ; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
795 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
796 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0
797 ; GFX8-NEXT: s_setpc_b64 s[30:31]
799 ; GFX9-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16:
801 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
802 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
803 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
804 ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2
805 ; GFX9-NEXT: s_movk_i32 s4, 0x7fff
806 ; GFX9-NEXT: v_med3_f32 v0, v0, v1, v2
807 ; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1
808 ; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4
809 ; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0
810 ; GFX9-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
811 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
812 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
813 ; GFX9-NEXT: s_setpc_b64 s[30:31]
814 %arg0.ext = fpext half %arg0 to float
815 %arg1.ext = fpext half %arg1 to float
816 %arg2.ext = fpext half %arg2 to float
817 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
818 %cast = fptrunc float %med3 to bfloat
822 define half @fmed3_f32_fpext_f16_multi_use_0(half %arg0, half %arg1, half %arg2, ptr addrspace(1) %ptr) #1 {
823 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_multi_use_0:
824 ; GFX7-SDAG: ; %bb.0:
825 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
826 ; GFX7-SDAG-NEXT: v_med3_f32 v1, v0, v1, v2
827 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
828 ; GFX7-SDAG-NEXT: flat_store_dword v[3:4], v0
829 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
830 ; GFX7-SDAG-NEXT: v_mov_b32_e32 v0, v1
831 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
832 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
834 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_multi_use_0:
835 ; GFX7-GISEL: ; %bb.0:
836 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
837 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v0
838 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v1
839 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v2
840 ; GFX7-GISEL-NEXT: flat_store_dword v[3:4], v5
841 ; GFX7-GISEL-NEXT: v_med3_f32 v0, v5, v0, v1
842 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
843 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
844 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
846 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_multi_use_0:
847 ; GFX8-SDAG: ; %bb.0:
848 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
849 ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v0
850 ; GFX8-SDAG-NEXT: flat_store_dword v[3:4], v5
851 ; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v0, v1
852 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
853 ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v2
854 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v3, v0
855 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0)
856 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
858 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_multi_use_0:
859 ; GFX8-GISEL: ; %bb.0:
860 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
861 ; GFX8-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v0
862 ; GFX8-GISEL-NEXT: flat_store_dword v[3:4], v5
863 ; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v0, v1
864 ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
865 ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v3, v2
866 ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
867 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
868 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
870 ; GFX9-LABEL: fmed3_f32_fpext_f16_multi_use_0:
872 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
873 ; GFX9-NEXT: v_cvt_f32_f16_e32 v5, v0
874 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v1
875 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v2
876 ; GFX9-NEXT: global_store_dword v[3:4], v5, off
877 ; GFX9-NEXT: v_med3_f32 v0, v5, v0, v1
878 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
879 ; GFX9-NEXT: s_waitcnt vmcnt(0)
880 ; GFX9-NEXT: s_setpc_b64 s[30:31]
881 %arg0.ext = fpext half %arg0 to float
882 store float %arg0.ext, ptr addrspace(1) %ptr
883 %arg1.ext = fpext half %arg1 to float
884 %arg2.ext = fpext half %arg2 to float
885 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
886 %cast = fptrunc float %med3 to half
890 define half @fmed3_f32_fpext_f16_multi_use_1(half %arg0, half %arg1, half %arg2, ptr addrspace(1) %ptr) #1 {
891 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_multi_use_1:
892 ; GFX7-SDAG: ; %bb.0:
893 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
894 ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2
895 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
896 ; GFX7-SDAG-NEXT: flat_store_dword v[3:4], v1
897 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
898 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
899 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
901 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_multi_use_1:
902 ; GFX7-GISEL: ; %bb.0:
903 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
904 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
905 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
906 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
907 ; GFX7-GISEL-NEXT: flat_store_dword v[3:4], v1
908 ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2
909 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
910 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
911 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
913 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_multi_use_1:
914 ; GFX8-SDAG: ; %bb.0:
915 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
916 ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v1
917 ; GFX8-SDAG-NEXT: flat_store_dword v[3:4], v5
918 ; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v0, v1
919 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
920 ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v2
921 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v3, v0
922 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0)
923 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
925 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_multi_use_1:
926 ; GFX8-GISEL: ; %bb.0:
927 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
928 ; GFX8-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v1
929 ; GFX8-GISEL-NEXT: flat_store_dword v[3:4], v5
930 ; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v0, v1
931 ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
932 ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v3, v2
933 ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
934 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
935 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
937 ; GFX9-LABEL: fmed3_f32_fpext_f16_multi_use_1:
939 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
940 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
941 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
942 ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2
943 ; GFX9-NEXT: global_store_dword v[3:4], v1, off
944 ; GFX9-NEXT: v_med3_f32 v0, v0, v1, v2
945 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
946 ; GFX9-NEXT: s_waitcnt vmcnt(0)
947 ; GFX9-NEXT: s_setpc_b64 s[30:31]
948 %arg0.ext = fpext half %arg0 to float
949 %arg1.ext = fpext half %arg1 to float
950 store float %arg1.ext, ptr addrspace(1) %ptr
951 %arg2.ext = fpext half %arg2 to float
952 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
953 %cast = fptrunc float %med3 to half
957 define half @fmed3_f32_fpext_f16_multi_use_2(half %arg0, half %arg1, half %arg2, ptr addrspace(1) %ptr) #1 {
958 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_multi_use_2:
959 ; GFX7-SDAG: ; %bb.0:
960 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
961 ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2
962 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
963 ; GFX7-SDAG-NEXT: flat_store_dword v[3:4], v2
964 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
965 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
966 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
968 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_multi_use_2:
969 ; GFX7-GISEL: ; %bb.0:
970 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
971 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
972 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
973 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
974 ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2
975 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
976 ; GFX7-GISEL-NEXT: flat_store_dword v[3:4], v2
977 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
978 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
980 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_multi_use_2:
981 ; GFX8-SDAG: ; %bb.0:
982 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
983 ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v2
984 ; GFX8-SDAG-NEXT: flat_store_dword v[3:4], v5
985 ; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v0, v1
986 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
987 ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v2
988 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v3, v0
989 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0)
990 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
992 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_multi_use_2:
993 ; GFX8-GISEL: ; %bb.0:
994 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
995 ; GFX8-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v2
996 ; GFX8-GISEL-NEXT: flat_store_dword v[3:4], v5
997 ; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v0, v1
998 ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
999 ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v3, v2
1000 ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
1001 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
1002 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
1004 ; GFX9-LABEL: fmed3_f32_fpext_f16_multi_use_2:
1006 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1007 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
1008 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
1009 ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2
1010 ; GFX9-NEXT: v_med3_f32 v0, v0, v1, v2
1011 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
1012 ; GFX9-NEXT: global_store_dword v[3:4], v2, off
1013 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1014 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1015 %arg0.ext = fpext half %arg0 to float
1016 %arg1.ext = fpext half %arg1 to float
1017 %arg2.ext = fpext half %arg2 to float
1018 store float %arg2.ext, ptr addrspace(1) %ptr
1019 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
1020 %cast = fptrunc float %med3 to
1024 define half @fmed3_f32_fpext_bf16(bfloat %arg0, bfloat %arg1, bfloat %arg2) #1 {
1025 ; GFX7-LABEL: fmed3_f32_fpext_bf16:
1027 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1028 ; GFX7-NEXT: v_med3_f32 v0, v0, v1, v2
1029 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1030 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1031 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1033 ; GFX8-LABEL: fmed3_f32_fpext_bf16:
1035 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1036 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
1037 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
1038 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
1039 ; GFX8-NEXT: v_med3_f32 v0, v0, v1, v2
1040 ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
1041 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1043 ; GFX9-LABEL: fmed3_f32_fpext_bf16:
1045 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1046 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
1047 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1
1048 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2
1049 ; GFX9-NEXT: v_med3_f32 v0, v0, v1, v2
1050 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
1051 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1052 %arg0.ext = fpext bfloat %arg0 to float
1053 %arg1.ext = fpext bfloat %arg1 to float
1054 %arg2.ext = fpext bfloat %arg2 to float
1055 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
1056 %cast = fptrunc float %med3 to half
1060 define half @fmed3_f32_fpext_f16_bf16_0(bfloat %arg0, half %arg1, half %arg2) #1 {
1061 ; GFX7-LABEL: fmed3_f32_fpext_f16_bf16_0:
1063 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1064 ; GFX7-NEXT: v_med3_f32 v0, v0, v1, v2
1065 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1066 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1067 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1069 ; GFX8-LABEL: fmed3_f32_fpext_f16_bf16_0:
1071 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1072 ; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1
1073 ; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2
1074 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
1075 ; GFX8-NEXT: v_med3_f32 v0, v0, v1, v2
1076 ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
1077 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1079 ; GFX9-LABEL: fmed3_f32_fpext_f16_bf16_0:
1081 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1082 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
1083 ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2
1084 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
1085 ; GFX9-NEXT: v_med3_f32 v0, v0, v1, v2
1086 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
1087 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1088 %arg0.ext = fpext bfloat %arg0 to float
1089 %arg1.ext = fpext half %arg1 to float
1090 %arg2.ext = fpext half %arg2 to float
1091 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
1092 %cast = fptrunc float %med3 to half
1096 define half @fmed3_f32_fpext_f16_bf16_1(half %arg0, bfloat %arg1, half %arg2) #1 {
1097 ; GFX7-LABEL: fmed3_f32_fpext_f16_bf16_1:
1099 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1100 ; GFX7-NEXT: v_med3_f32 v0, v0, v1, v2
1101 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1102 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1103 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1105 ; GFX8-LABEL: fmed3_f32_fpext_f16_bf16_1:
1107 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1108 ; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0
1109 ; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2
1110 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
1111 ; GFX8-NEXT: v_med3_f32 v0, v0, v1, v2
1112 ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
1113 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1115 ; GFX9-LABEL: fmed3_f32_fpext_f16_bf16_1:
1117 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1118 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
1119 ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2
1120 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1
1121 ; GFX9-NEXT: v_med3_f32 v0, v0, v1, v2
1122 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
1123 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1124 %arg0.ext = fpext half %arg0 to float
1125 %arg1.ext = fpext bfloat %arg1 to float
1126 %arg2.ext = fpext half %arg2 to float
1127 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
1128 %cast = fptrunc float %med3 to half
1132 define half @fmed3_f32_fpext_f16_bf16_2(half %arg0, half %arg1, bfloat %arg2) #1 {
1133 ; GFX7-LABEL: fmed3_f32_fpext_f16_bf16_2:
1135 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1136 ; GFX7-NEXT: v_med3_f32 v0, v0, v1, v2
1137 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1138 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1139 ; GFX7-NEXT: s_setpc_b64 s[30:31]
1141 ; GFX8-LABEL: fmed3_f32_fpext_f16_bf16_2:
1143 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1144 ; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0
1145 ; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1
1146 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
1147 ; GFX8-NEXT: v_med3_f32 v0, v0, v1, v2
1148 ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
1149 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1151 ; GFX9-LABEL: fmed3_f32_fpext_f16_bf16_2:
1153 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1154 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
1155 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
1156 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2
1157 ; GFX9-NEXT: v_med3_f32 v0, v0, v1, v2
1158 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
1159 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1160 %arg0.ext = fpext half %arg0 to float
1161 %arg1.ext = fpext half %arg1 to float
1162 %arg2.ext = fpext bfloat %arg2 to float
1163 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
1164 %cast = fptrunc float %med3 to half
1168 define half @fmed3_f32_fpext_f16_unrepresentable_k0(half %arg1, half %arg2) #1 {
1169 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k0:
1170 ; GFX7-SDAG: ; %bb.0:
1171 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1172 ; GFX7-SDAG-NEXT: s_mov_b32 s4, 0x4f800000
1173 ; GFX7-SDAG-NEXT: v_med3_f32 v0, s4, v0, v1
1174 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1175 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
1176 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
1178 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k0:
1179 ; GFX7-GISEL: ; %bb.0:
1180 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1181 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
1182 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
1183 ; GFX7-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
1184 ; GFX7-GISEL-NEXT: v_med3_f32 v0, v2, v0, v1
1185 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
1186 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
1188 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k0:
1189 ; GFX8-SDAG: ; %bb.0:
1190 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1191 ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
1192 ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
1193 ; GFX8-SDAG-NEXT: s_mov_b32 s4, 0x4f800000
1194 ; GFX8-SDAG-NEXT: v_med3_f32 v0, s4, v0, v1
1195 ; GFX8-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1196 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
1198 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k0:
1199 ; GFX8-GISEL: ; %bb.0:
1200 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1201 ; GFX8-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
1202 ; GFX8-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
1203 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
1204 ; GFX8-GISEL-NEXT: v_med3_f32 v0, v2, v0, v1
1205 ; GFX8-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
1206 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
1208 ; GFX9-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k0:
1209 ; GFX9-SDAG: ; %bb.0:
1210 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1211 ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
1212 ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
1213 ; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x4f800000
1214 ; GFX9-SDAG-NEXT: v_med3_f32 v0, s4, v0, v1
1215 ; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1216 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1218 ; GFX9-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k0:
1219 ; GFX9-GISEL: ; %bb.0:
1220 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1221 ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
1222 ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
1223 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
1224 ; GFX9-GISEL-NEXT: v_med3_f32 v0, v2, v0, v1
1225 ; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
1226 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1227 %arg1.ext = fpext half %arg1 to float
1228 %arg2.ext = fpext half %arg2 to float
1229 %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x41f0000000000000, float %arg1.ext, float %arg2.ext)
1230 %cast = fptrunc float %med3 to half
1234 define half @fmed3_f32_fpext_f16_unrepresentable_k1(half %arg0, half %arg2) #1 {
1235 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k1:
1236 ; GFX7-SDAG: ; %bb.0:
1237 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1238 ; GFX7-SDAG-NEXT: s_mov_b32 s4, 0x4f800000
1239 ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, s4, v1
1240 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1241 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
1242 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
1244 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k1:
1245 ; GFX7-GISEL: ; %bb.0:
1246 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1247 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
1248 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
1249 ; GFX7-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
1250 ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v2, v1
1251 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
1252 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
1254 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k1:
1255 ; GFX8-SDAG: ; %bb.0:
1256 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1257 ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
1258 ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
1259 ; GFX8-SDAG-NEXT: s_mov_b32 s4, 0x4f800000
1260 ; GFX8-SDAG-NEXT: v_med3_f32 v0, v0, s4, v1
1261 ; GFX8-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1262 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
1264 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k1:
1265 ; GFX8-GISEL: ; %bb.0:
1266 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1267 ; GFX8-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
1268 ; GFX8-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
1269 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
1270 ; GFX8-GISEL-NEXT: v_med3_f32 v0, v0, v2, v1
1271 ; GFX8-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
1272 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
1274 ; GFX9-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k1:
1275 ; GFX9-SDAG: ; %bb.0:
1276 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1277 ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
1278 ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
1279 ; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x4f800000
1280 ; GFX9-SDAG-NEXT: v_med3_f32 v0, v0, s4, v1
1281 ; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1282 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1284 ; GFX9-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k1:
1285 ; GFX9-GISEL: ; %bb.0:
1286 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1287 ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
1288 ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
1289 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
1290 ; GFX9-GISEL-NEXT: v_med3_f32 v0, v0, v2, v1
1291 ; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
1292 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1293 %arg0.ext = fpext half %arg0 to float
1294 %arg2.ext = fpext half %arg2 to float
1295 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float 0x41f0000000000000, float %arg2.ext)
1296 %cast = fptrunc float %med3 to half
1300 define half @fmed3_f32_fpext_f16_unrepresentable_k2(half %arg0, half %arg1) #1 {
1301 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k2:
1302 ; GFX7-SDAG: ; %bb.0:
1303 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1304 ; GFX7-SDAG-NEXT: s_mov_b32 s4, 0x4f800000
1305 ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, s4
1306 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1307 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
1308 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
1310 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k2:
1311 ; GFX7-GISEL: ; %bb.0:
1312 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1313 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
1314 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
1315 ; GFX7-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
1316 ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2
1317 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
1318 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
1320 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k2:
1321 ; GFX8-SDAG: ; %bb.0:
1322 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1323 ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
1324 ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
1325 ; GFX8-SDAG-NEXT: s_mov_b32 s4, 0x4f800000
1326 ; GFX8-SDAG-NEXT: v_med3_f32 v0, v0, v1, s4
1327 ; GFX8-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1328 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
1330 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k2:
1331 ; GFX8-GISEL: ; %bb.0:
1332 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1333 ; GFX8-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
1334 ; GFX8-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
1335 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
1336 ; GFX8-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2
1337 ; GFX8-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
1338 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
1340 ; GFX9-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k2:
1341 ; GFX9-SDAG: ; %bb.0:
1342 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1343 ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
1344 ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
1345 ; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x4f800000
1346 ; GFX9-SDAG-NEXT: v_med3_f32 v0, v0, v1, s4
1347 ; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1348 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1350 ; GFX9-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k2:
1351 ; GFX9-GISEL: ; %bb.0:
1352 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1353 ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
1354 ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
1355 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
1356 ; GFX9-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2
1357 ; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
1358 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1359 %arg0.ext = fpext half %arg0 to float
1360 %arg1.ext = fpext half %arg1 to float
1361 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float 0x41f0000000000000)
1362 %cast = fptrunc float %med3 to half
1366 attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
1367 attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn }
1368 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: