1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; Test no legal f16. Should just keep the cast to f32 and
3 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-SDAG %s
4 ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-GISEL %s
6 ; Test legal f16, no f16 fmed3. Should expand to min/max sequence
7 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-SDAG %s
8 ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s
10 ; Legal f16 med3. InstCombine ought to shrink the f32 op to f16 so the codegen doesn't really matter for this.
11 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s
12 ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
15 declare float @llvm.amdgcn.fmed3.f32(float, float, float) #0
16 declare float @llvm.fabs.f32(float) #0
17 declare half @llvm.fabs.f16(half) #0
19 define half @fmed3_f32_fpext_f16(half %arg0, half %arg1, half %arg2) #1 {
20 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16:
22 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23 ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2
24 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
25 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
26 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
28 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16:
29 ; GFX7-GISEL: ; %bb.0:
30 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
31 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
32 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
33 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
34 ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2
35 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
36 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
38 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16:
40 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41 ; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v0, v1
42 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
43 ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v2
44 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v3, v0
45 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
47 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16:
48 ; GFX8-GISEL: ; %bb.0:
49 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
50 ; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v0, v1
51 ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
52 ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v3, v2
53 ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
54 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
56 ; GFX9-LABEL: fmed3_f32_fpext_f16:
58 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
59 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
60 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
61 ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2
62 ; GFX9-NEXT: v_med3_f32 v0, v0, v1, v2
63 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
64 ; GFX9-NEXT: s_setpc_b64 s[30:31]
65 %arg0.ext = fpext half %arg0 to float
66 %arg1.ext = fpext half %arg1 to float
67 %arg2.ext = fpext half %arg2 to float
68 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
69 %cast = fptrunc float %med3 to half
73 define half @fmed3_f32_fpext_f16_flags(half %arg0, half %arg1, half %arg2) #1 {
74 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_flags:
76 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
77 ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2
78 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
79 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
80 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
82 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_flags:
83 ; GFX7-GISEL: ; %bb.0:
84 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
86 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
87 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
88 ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2
89 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
90 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
92 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_flags:
94 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
95 ; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v0, v1
96 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
97 ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v2
98 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v3, v0
99 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
101 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_flags:
102 ; GFX8-GISEL: ; %bb.0:
103 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104 ; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v0, v1
105 ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
106 ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v3, v2
107 ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
108 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
110 ; GFX9-LABEL: fmed3_f32_fpext_f16_flags:
112 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
113 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
114 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
115 ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2
116 ; GFX9-NEXT: v_med3_f32 v0, v0, v1, v2
117 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
118 ; GFX9-NEXT: s_setpc_b64 s[30:31]
119 %arg0.ext = fpext half %arg0 to float
120 %arg1.ext = fpext half %arg1 to float
121 %arg2.ext = fpext half %arg2 to float
122 %med3 = call nsz float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
123 %cast = fptrunc float %med3 to half
127 define half @fmed3_f32_fpext_f16_multi_use(half %arg0, half %arg1, half %arg2, ptr addrspace(1) %ptr) #1 {
128 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_multi_use:
129 ; GFX7-SDAG: ; %bb.0:
130 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
131 ; GFX7-SDAG-NEXT: v_med3_f32 v1, v0, v1, v2
132 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v1
133 ; GFX7-SDAG-NEXT: flat_store_dword v[3:4], v1
134 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
135 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
136 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
138 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_multi_use:
139 ; GFX7-GISEL: ; %bb.0:
140 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
142 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
143 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
144 ; GFX7-GISEL-NEXT: v_med3_f32 v1, v0, v1, v2
145 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v1
146 ; GFX7-GISEL-NEXT: flat_store_dword v[3:4], v1
147 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
148 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
150 ; GFX8-LABEL: fmed3_f32_fpext_f16_multi_use:
152 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
153 ; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0
154 ; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1
155 ; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2
156 ; GFX8-NEXT: v_med3_f32 v1, v0, v1, v2
157 ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v1
158 ; GFX8-NEXT: flat_store_dword v[3:4], v1
159 ; GFX8-NEXT: s_waitcnt vmcnt(0)
160 ; GFX8-NEXT: s_setpc_b64 s[30:31]
162 ; GFX9-LABEL: fmed3_f32_fpext_f16_multi_use:
164 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
165 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
166 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
167 ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2
168 ; GFX9-NEXT: v_med3_f32 v1, v0, v1, v2
169 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v1
170 ; GFX9-NEXT: global_store_dword v[3:4], v1, off
171 ; GFX9-NEXT: s_waitcnt vmcnt(0)
172 ; GFX9-NEXT: s_setpc_b64 s[30:31]
173 %arg0.ext = fpext half %arg0 to float
174 %arg1.ext = fpext half %arg1 to float
175 %arg2.ext = fpext half %arg2 to float
176 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
177 store float %med3, ptr addrspace(1) %ptr
178 %cast = fptrunc float %med3 to half
182 define half @fmed3_f32_fpext_f16_k0(half %arg1, half %arg2) #1 {
183 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_k0:
184 ; GFX7-SDAG: ; %bb.0:
185 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
186 ; GFX7-SDAG-NEXT: v_med3_f32 v0, 2.0, v0, v1
187 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
188 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
189 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
191 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_k0:
192 ; GFX7-GISEL: ; %bb.0:
193 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
194 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
195 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
196 ; GFX7-GISEL-NEXT: v_med3_f32 v0, 2.0, v0, v1
197 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
198 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
200 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_k0:
201 ; GFX8-SDAG: ; %bb.0:
202 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
203 ; GFX8-SDAG-NEXT: v_max_f16_e32 v2, 2.0, v0
204 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, 2.0, v0
205 ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
206 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v2, v0
207 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
209 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_k0:
210 ; GFX8-GISEL: ; %bb.0:
211 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
212 ; GFX8-GISEL-NEXT: v_min_f16_e32 v2, 2.0, v0
213 ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, 2.0, v0
214 ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v2, v1
215 ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
216 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
218 ; GFX9-LABEL: fmed3_f32_fpext_f16_k0:
220 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
221 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
222 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
223 ; GFX9-NEXT: v_med3_f32 v0, 2.0, v0, v1
224 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
225 ; GFX9-NEXT: s_setpc_b64 s[30:31]
226 %arg1.ext = fpext half %arg1 to float
227 %arg2.ext = fpext half %arg2 to float
228 %med3 = call float @llvm.amdgcn.fmed3.f32(float 2.0, float %arg1.ext, float %arg2.ext)
229 %cast = fptrunc float %med3 to half
233 define half @fmed3_f32_fpext_f16_k1(half %arg0, half %arg2) #1 {
234 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_k1:
235 ; GFX7-SDAG: ; %bb.0:
236 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
237 ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, 2.0, v1
238 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
239 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
240 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
242 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_k1:
243 ; GFX7-GISEL: ; %bb.0:
244 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
245 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
246 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
247 ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, 2.0, v1
248 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
249 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
251 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_k1:
252 ; GFX8-SDAG: ; %bb.0:
253 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
254 ; GFX8-SDAG-NEXT: v_max_f16_e32 v2, 2.0, v0
255 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, 2.0, v0
256 ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
257 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v2, v0
258 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
260 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_k1:
261 ; GFX8-GISEL: ; %bb.0:
262 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
263 ; GFX8-GISEL-NEXT: v_min_f16_e32 v2, 2.0, v0
264 ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, 2.0, v0
265 ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v2, v1
266 ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
267 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
269 ; GFX9-LABEL: fmed3_f32_fpext_f16_k1:
271 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
272 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
273 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
274 ; GFX9-NEXT: v_med3_f32 v0, v0, 2.0, v1
275 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
276 ; GFX9-NEXT: s_setpc_b64 s[30:31]
277 %arg0.ext = fpext half %arg0 to float
278 %arg2.ext = fpext half %arg2 to float
279 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float 2.0, float %arg2.ext)
280 %cast = fptrunc float %med3 to half
284 define half @fmed3_f32_fpext_f16_k2(half %arg0, half %arg1) #1 {
285 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_k2:
286 ; GFX7-SDAG: ; %bb.0:
287 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
288 ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, 2.0
289 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
290 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
291 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
293 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_k2:
294 ; GFX7-GISEL: ; %bb.0:
295 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
296 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
297 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
298 ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, 2.0
299 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
300 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
302 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_k2:
303 ; GFX8-SDAG: ; %bb.0:
304 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
305 ; GFX8-SDAG-NEXT: v_max_f16_e32 v2, v0, v1
306 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
307 ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, 2.0, v0
308 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v2, v0
309 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
311 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_k2:
312 ; GFX8-GISEL: ; %bb.0:
313 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
314 ; GFX8-GISEL-NEXT: v_min_f16_e32 v2, v0, v1
315 ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
316 ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, 2.0, v2
317 ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
318 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
320 ; GFX9-LABEL: fmed3_f32_fpext_f16_k2:
322 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
323 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
324 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
325 ; GFX9-NEXT: v_med3_f32 v0, v0, v1, 2.0
326 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
327 ; GFX9-NEXT: s_setpc_b64 s[30:31]
328 %arg0.ext = fpext half %arg0 to float
329 %arg1.ext = fpext half %arg1 to float
330 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float 2.0)
331 %cast = fptrunc float %med3 to half
335 define half @fmed3_f32_fpext_f16_k0_k1(half %arg2) #1 {
336 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_k0_k1:
337 ; GFX7-SDAG: ; %bb.0:
338 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
339 ; GFX7-SDAG-NEXT: v_mov_b32_e32 v1, 0x41800000
340 ; GFX7-SDAG-NEXT: v_med3_f32 v0, 0, v1, v0
341 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
342 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
343 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
345 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_k0_k1:
346 ; GFX7-GISEL: ; %bb.0:
347 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
348 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
349 ; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, 0x41800000
350 ; GFX7-GISEL-NEXT: v_med3_f32 v0, 0, v1, v0
351 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
352 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
354 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_k0_k1:
355 ; GFX8-SDAG: ; %bb.0:
356 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
357 ; GFX8-SDAG-NEXT: v_mov_b32_e32 v1, 0x4c00
358 ; GFX8-SDAG-NEXT: v_max_f16_e32 v2, 0, v1
359 ; GFX8-SDAG-NEXT: v_min_f16_e32 v1, 0, v1
360 ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v1, v0
361 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v2, v0
362 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
364 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_k0_k1:
365 ; GFX8-GISEL: ; %bb.0:
366 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
367 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0x4c00
368 ; GFX8-GISEL-NEXT: v_min_f16_e32 v2, 0, v1
369 ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, 0, v1
370 ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v2, v0
371 ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v1, v0
372 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
374 ; GFX9-LABEL: fmed3_f32_fpext_f16_k0_k1:
376 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
377 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
378 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x41800000
379 ; GFX9-NEXT: v_med3_f32 v0, 0, v1, v0
380 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
381 ; GFX9-NEXT: s_setpc_b64 s[30:31]
382 %arg2.ext = fpext half %arg2 to float
383 %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 16.0, float %arg2.ext)
384 %cast = fptrunc float %med3 to half
388 define half @fmed3_f32_fpext_f16_k0_k2(half %arg1) #1 {
389 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_k0_k2:
390 ; GFX7-SDAG: ; %bb.0:
391 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
392 ; GFX7-SDAG-NEXT: v_med3_f32 v0, 0, v0, 2.0
393 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
394 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
395 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
397 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_k0_k2:
398 ; GFX7-GISEL: ; %bb.0:
399 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
400 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
401 ; GFX7-GISEL-NEXT: v_med3_f32 v0, 0, v0, 2.0
402 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
403 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
405 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_k0_k2:
406 ; GFX8-SDAG: ; %bb.0:
407 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
408 ; GFX8-SDAG-NEXT: v_max_f16_e32 v1, 0, v0
409 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, 0, v0
410 ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, 2.0, v0
411 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v1, v0
412 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
414 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_k0_k2:
415 ; GFX8-GISEL: ; %bb.0:
416 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
417 ; GFX8-GISEL-NEXT: v_min_f16_e32 v1, 0, v0
418 ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, 0, v0
419 ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, 2.0, v1
420 ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
421 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
423 ; GFX9-LABEL: fmed3_f32_fpext_f16_k0_k2:
425 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
426 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
427 ; GFX9-NEXT: v_med3_f32 v0, 0, v0, 2.0
428 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
429 ; GFX9-NEXT: s_setpc_b64 s[30:31]
430 %arg1.ext = fpext half %arg1 to float
431 %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %arg1.ext, float 2.0)
432 %cast = fptrunc float %med3 to half
436 define half @fmed3_f32_fpext_f16_fabs(half %arg0, half %arg1, half %arg2) #1 {
437 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_fabs:
438 ; GFX7-SDAG: ; %bb.0:
439 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
440 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
441 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
442 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
443 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
444 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v1|
445 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v2, |v2|
446 ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2
447 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
448 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
449 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
451 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_fabs:
452 ; GFX7-GISEL: ; %bb.0:
453 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
454 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v0, |v0|
455 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v1, |v1|
456 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v2, |v2|
457 ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2
458 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
459 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
461 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_fabs:
462 ; GFX8-SDAG: ; %bb.0:
463 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
464 ; GFX8-SDAG-NEXT: v_max_f16_e64 v3, |v0|, |v1|
465 ; GFX8-SDAG-NEXT: v_min_f16_e64 v0, |v0|, |v1|
466 ; GFX8-SDAG-NEXT: v_max_f16_e64 v0, v0, |v2|
467 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v3, v0
468 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
470 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_fabs:
471 ; GFX8-GISEL: ; %bb.0:
472 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
473 ; GFX8-GISEL-NEXT: v_min_f16_e64 v3, |v0|, |v1|
474 ; GFX8-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v1|
475 ; GFX8-GISEL-NEXT: v_max_f16_e64 v1, v3, |v2|
476 ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
477 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
479 ; GFX9-LABEL: fmed3_f32_fpext_f16_fabs:
481 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
482 ; GFX9-NEXT: v_cvt_f32_f16_e64 v0, |v0|
483 ; GFX9-NEXT: v_cvt_f32_f16_e64 v1, |v1|
484 ; GFX9-NEXT: v_cvt_f32_f16_e64 v2, |v2|
485 ; GFX9-NEXT: v_med3_f32 v0, v0, v1, v2
486 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
487 ; GFX9-NEXT: s_setpc_b64 s[30:31]
488 %fabs.arg0 = call half @llvm.fabs.f16(half %arg0)
489 %fabs.arg1 = call half @llvm.fabs.f16(half %arg1)
490 %fabs.arg2 = call half @llvm.fabs.f16(half %arg2)
491 %arg0.ext = fpext half %fabs.arg0 to float
492 %arg1.ext = fpext half %fabs.arg1 to float
493 %arg2.ext = fpext half %fabs.arg2 to float
494 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
495 %cast = fptrunc float %med3 to half
499 define half @fmed3_fabs_f32_fpext_f16(half %arg0, half %arg1, half %arg2) #1 {
500 ; GFX7-SDAG-LABEL: fmed3_fabs_f32_fpext_f16:
501 ; GFX7-SDAG: ; %bb.0:
502 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
503 ; GFX7-SDAG-NEXT: v_med3_f32 v0, |v0|, |v1|, |v2|
504 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
505 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
506 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
508 ; GFX7-GISEL-LABEL: fmed3_fabs_f32_fpext_f16:
509 ; GFX7-GISEL: ; %bb.0:
510 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
511 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
512 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
513 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
514 ; GFX7-GISEL-NEXT: v_med3_f32 v0, |v0|, |v1|, |v2|
515 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
516 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
518 ; GFX8-LABEL: fmed3_fabs_f32_fpext_f16:
520 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
521 ; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0
522 ; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1
523 ; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2
524 ; GFX8-NEXT: v_med3_f32 v0, |v0|, |v1|, |v2|
525 ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
526 ; GFX8-NEXT: s_setpc_b64 s[30:31]
528 ; GFX9-LABEL: fmed3_fabs_f32_fpext_f16:
530 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
531 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
532 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
533 ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2
534 ; GFX9-NEXT: v_med3_f32 v0, |v0|, |v1|, |v2|
535 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
536 ; GFX9-NEXT: s_setpc_b64 s[30:31]
537 %arg0.ext = fpext half %arg0 to float
538 %arg1.ext = fpext half %arg1 to float
539 %arg2.ext = fpext half %arg2 to float
540 %fabs.ext.arg0 = call float @llvm.fabs.f32(float %arg0.ext)
541 %fabs.ext.arg1 = call float @llvm.fabs.f32(float %arg1.ext)
542 %fabs.ext.arg2 = call float @llvm.fabs.f32(float %arg2.ext)
543 %med3 = call float @llvm.amdgcn.fmed3.f32(float %fabs.ext.arg0, float %fabs.ext.arg1, float %fabs.ext.arg2)
544 %cast = fptrunc float %med3 to half
548 define half @fmed3_f32_fpext_f16_fneg(half %arg0, half %arg1, half %arg2) #1 {
549 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_fneg:
550 ; GFX7-SDAG: ; %bb.0:
551 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
552 ; GFX7-SDAG-NEXT: v_med3_f32 v0, -v0, -v1, -v2
553 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
554 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
555 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
557 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_fneg:
558 ; GFX7-GISEL: ; %bb.0:
559 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
560 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0
561 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v1, -v1
562 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v2, -v2
563 ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2
564 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
565 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
567 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_fneg:
568 ; GFX8-SDAG: ; %bb.0:
569 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
570 ; GFX8-SDAG-NEXT: v_max_f16_e64 v3, -v0, -v1
571 ; GFX8-SDAG-NEXT: v_min_f16_e64 v0, -v0, -v1
572 ; GFX8-SDAG-NEXT: v_max_f16_e64 v0, v0, -v2
573 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v3, v0
574 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
576 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_fneg:
577 ; GFX8-GISEL: ; %bb.0:
578 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
579 ; GFX8-GISEL-NEXT: v_min_f16_e64 v3, -v0, -v1
580 ; GFX8-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v1
581 ; GFX8-GISEL-NEXT: v_max_f16_e64 v1, v3, -v2
582 ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
583 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
585 ; GFX9-LABEL: fmed3_f32_fpext_f16_fneg:
587 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
588 ; GFX9-NEXT: v_cvt_f32_f16_e64 v0, -v0
589 ; GFX9-NEXT: v_cvt_f32_f16_e64 v1, -v1
590 ; GFX9-NEXT: v_cvt_f32_f16_e64 v2, -v2
591 ; GFX9-NEXT: v_med3_f32 v0, v0, v1, v2
592 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
593 ; GFX9-NEXT: s_setpc_b64 s[30:31]
594 %fneg.arg0 = fneg half %arg0
595 %fneg.arg1 = fneg half %arg1
596 %fneg.arg2 = fneg half %arg2
597 %arg0.ext = fpext half %fneg.arg0 to float
598 %arg1.ext = fpext half %fneg.arg1 to float
599 %arg2.ext = fpext half %fneg.arg2 to float
600 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
601 %cast = fptrunc float %med3 to half
605 define half @fmed3_fneg_f32_fpext_f16(half %arg0, half %arg1, half %arg2) #1 {
606 ; GFX7-SDAG-LABEL: fmed3_fneg_f32_fpext_f16:
607 ; GFX7-SDAG: ; %bb.0:
608 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
609 ; GFX7-SDAG-NEXT: v_med3_f32 v0, -v0, -v1, -v2
610 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
611 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
612 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
614 ; GFX7-GISEL-LABEL: fmed3_fneg_f32_fpext_f16:
615 ; GFX7-GISEL: ; %bb.0:
616 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
617 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
618 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
619 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
620 ; GFX7-GISEL-NEXT: v_med3_f32 v0, -v0, -v1, -v2
621 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
622 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
624 ; GFX8-LABEL: fmed3_fneg_f32_fpext_f16:
626 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
627 ; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0
628 ; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1
629 ; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2
630 ; GFX8-NEXT: v_med3_f32 v0, -v0, -v1, -v2
631 ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
632 ; GFX8-NEXT: s_setpc_b64 s[30:31]
634 ; GFX9-LABEL: fmed3_fneg_f32_fpext_f16:
636 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
637 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
638 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
639 ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2
640 ; GFX9-NEXT: v_med3_f32 v0, -v0, -v1, -v2
641 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
642 ; GFX9-NEXT: s_setpc_b64 s[30:31]
643 %arg0.ext = fpext half %arg0 to float
644 %arg1.ext = fpext half %arg1 to float
645 %arg2.ext = fpext half %arg2 to float
646 %fneg.ext.arg0 = fneg float %arg0.ext
647 %fneg.ext.arg1 = fneg float %arg1.ext
648 %fneg.ext.arg2 = fneg float %arg2.ext
649 %med3 = call float @llvm.amdgcn.fmed3.f32(float %fneg.ext.arg0, float %fneg.ext.arg1, float %fneg.ext.arg2)
650 %cast = fptrunc float %med3 to half
654 define half @fmed3_f32_fpext_f16_fneg_fabs(half %arg0, half %arg1, half %arg2) #1 {
655 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_fneg_fabs:
656 ; GFX7-SDAG: ; %bb.0:
657 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
658 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
659 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
660 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
661 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0|
662 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v1|
663 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e64 v2, |v2|
664 ; GFX7-SDAG-NEXT: v_med3_f32 v0, -v0, -v1, -v2
665 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
666 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
667 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
669 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_fneg_fabs:
670 ; GFX7-GISEL: ; %bb.0:
671 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
672 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
673 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v1, -|v1|
674 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e64 v2, -|v2|
675 ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2
676 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
677 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
679 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_fneg_fabs:
680 ; GFX8-SDAG: ; %bb.0:
681 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
682 ; GFX8-SDAG-NEXT: v_max_f16_e64 v3, -|v0|, -|v1|
683 ; GFX8-SDAG-NEXT: v_min_f16_e64 v0, -|v0|, -|v1|
684 ; GFX8-SDAG-NEXT: v_max_f16_e64 v0, v0, -|v2|
685 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v3, v0
686 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
688 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_fneg_fabs:
689 ; GFX8-GISEL: ; %bb.0:
690 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
691 ; GFX8-GISEL-NEXT: v_min_f16_e64 v3, -|v0|, -|v1|
692 ; GFX8-GISEL-NEXT: v_max_f16_e64 v0, -|v0|, -|v1|
693 ; GFX8-GISEL-NEXT: v_max_f16_e64 v1, v3, -|v2|
694 ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
695 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
697 ; GFX9-LABEL: fmed3_f32_fpext_f16_fneg_fabs:
699 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
700 ; GFX9-NEXT: v_cvt_f32_f16_e64 v0, -|v0|
701 ; GFX9-NEXT: v_cvt_f32_f16_e64 v1, -|v1|
702 ; GFX9-NEXT: v_cvt_f32_f16_e64 v2, -|v2|
703 ; GFX9-NEXT: v_med3_f32 v0, v0, v1, v2
704 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
705 ; GFX9-NEXT: s_setpc_b64 s[30:31]
706 %fabs.arg0 = call half @llvm.fabs.f16(half %arg0)
707 %fabs.arg1 = call half @llvm.fabs.f16(half %arg1)
708 %fabs.arg2 = call half @llvm.fabs.f16(half %arg2)
709 %fneg.fabs.arg0 = fneg half %fabs.arg0
710 %fneg.fabs.arg1 = fneg half %fabs.arg1
711 %fneg.fabs.arg2 = fneg half %fabs.arg2
712 %arg0.ext = fpext half %fneg.fabs.arg0 to float
713 %arg1.ext = fpext half %fneg.fabs.arg1 to float
714 %arg2.ext = fpext half %fneg.fabs.arg2 to float
715 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
716 %cast = fptrunc float %med3 to half
720 define half @fmed3_fneg_fabs_f32_fpext_f16(half %arg0, half %arg1, half %arg2) #1 {
721 ; GFX7-SDAG-LABEL: fmed3_fneg_fabs_f32_fpext_f16:
722 ; GFX7-SDAG: ; %bb.0:
723 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
724 ; GFX7-SDAG-NEXT: v_med3_f32 v0, -|v0|, -|v1|, -|v2|
725 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
726 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
727 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
729 ; GFX7-GISEL-LABEL: fmed3_fneg_fabs_f32_fpext_f16:
730 ; GFX7-GISEL: ; %bb.0:
731 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
732 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
733 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
734 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
735 ; GFX7-GISEL-NEXT: v_med3_f32 v0, -|v0|, -|v1|, -|v2|
736 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
737 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
739 ; GFX8-LABEL: fmed3_fneg_fabs_f32_fpext_f16:
741 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
742 ; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0
743 ; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1
744 ; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2
745 ; GFX8-NEXT: v_med3_f32 v0, -|v0|, -|v1|, -|v2|
746 ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
747 ; GFX8-NEXT: s_setpc_b64 s[30:31]
749 ; GFX9-LABEL: fmed3_fneg_fabs_f32_fpext_f16:
751 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
752 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
753 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
754 ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2
755 ; GFX9-NEXT: v_med3_f32 v0, -|v0|, -|v1|, -|v2|
756 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
757 ; GFX9-NEXT: s_setpc_b64 s[30:31]
758 %arg0.ext = fpext half %arg0 to float
759 %arg1.ext = fpext half %arg1 to float
760 %arg2.ext = fpext half %arg2 to float
761 %fabs.ext.arg0 = call float @llvm.fabs.f32(float %arg0.ext)
762 %fabs.ext.arg1 = call float @llvm.fabs.f32(float %arg1.ext)
763 %fabs.ext.arg2 = call float @llvm.fabs.f32(float %arg2.ext)
764 %fneg.fabs.ext.arg0 = fneg float %fabs.ext.arg0
765 %fneg.fabs.ext.arg1 = fneg float %fabs.ext.arg1
766 %fneg.fabs.ext.arg2 = fneg float %fabs.ext.arg2
767 %med3 = call float @llvm.amdgcn.fmed3.f32(float %fneg.fabs.ext.arg0, float %fneg.fabs.ext.arg1, float %fneg.fabs.ext.arg2)
768 %cast = fptrunc float %med3 to half
771 ; --------------------------------------------------------------------------------
773 ; --------------------------------------------------------------------------------
775 define bfloat @fmed3_f32_fpext_f16_fptrunc_bf16(half %arg0, half %arg1, half %arg2) #1 {
776 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16:
777 ; GFX7-SDAG: ; %bb.0:
778 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
779 ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2
780 ; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
781 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
783 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16:
784 ; GFX7-GISEL: ; %bb.0:
785 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
786 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
787 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
788 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
789 ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2
790 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
791 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
793 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16:
794 ; GFX8-SDAG: ; %bb.0:
795 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
796 ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
797 ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
798 ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
799 ; GFX8-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2
800 ; GFX8-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
801 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
803 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16:
804 ; GFX8-GISEL: ; %bb.0:
805 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
806 ; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v0, v1
807 ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
808 ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v3, v2
809 ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
810 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
812 ; GFX9-SDAG-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16:
813 ; GFX9-SDAG: ; %bb.0:
814 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
815 ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
816 ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
817 ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
818 ; GFX9-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2
819 ; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
820 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
822 ; GFX9-GISEL-LABEL: fmed3_f32_fpext_f16_fptrunc_bf16:
823 ; GFX9-GISEL: ; %bb.0:
824 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
825 ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
826 ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
827 ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
828 ; GFX9-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2
829 ; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
830 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
831 %arg0.ext = fpext half %arg0 to float
832 %arg1.ext = fpext half %arg1 to float
833 %arg2.ext = fpext half %arg2 to float
834 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
835 %cast = fptrunc float %med3 to bfloat
839 define half @fmed3_f32_fpext_f16_multi_use_0(half %arg0, half %arg1, half %arg2, ptr addrspace(1) %ptr) #1 {
840 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_multi_use_0:
841 ; GFX7-SDAG: ; %bb.0:
842 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
843 ; GFX7-SDAG-NEXT: v_med3_f32 v1, v0, v1, v2
844 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
845 ; GFX7-SDAG-NEXT: flat_store_dword v[3:4], v0
846 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
847 ; GFX7-SDAG-NEXT: v_mov_b32_e32 v0, v1
848 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
849 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
851 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_multi_use_0:
852 ; GFX7-GISEL: ; %bb.0:
853 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
854 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v0
855 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v1
856 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v2
857 ; GFX7-GISEL-NEXT: flat_store_dword v[3:4], v5
858 ; GFX7-GISEL-NEXT: v_med3_f32 v0, v5, v0, v1
859 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
860 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
861 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
863 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_multi_use_0:
864 ; GFX8-SDAG: ; %bb.0:
865 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
866 ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v0
867 ; GFX8-SDAG-NEXT: flat_store_dword v[3:4], v5
868 ; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v0, v1
869 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
870 ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v2
871 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v3, v0
872 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0)
873 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
875 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_multi_use_0:
876 ; GFX8-GISEL: ; %bb.0:
877 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
878 ; GFX8-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v0
879 ; GFX8-GISEL-NEXT: flat_store_dword v[3:4], v5
880 ; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v0, v1
881 ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
882 ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v3, v2
883 ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
884 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
885 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
887 ; GFX9-LABEL: fmed3_f32_fpext_f16_multi_use_0:
889 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
890 ; GFX9-NEXT: v_cvt_f32_f16_e32 v5, v0
891 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v1
892 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v2
893 ; GFX9-NEXT: global_store_dword v[3:4], v5, off
894 ; GFX9-NEXT: v_med3_f32 v0, v5, v0, v1
895 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
896 ; GFX9-NEXT: s_waitcnt vmcnt(0)
897 ; GFX9-NEXT: s_setpc_b64 s[30:31]
898 %arg0.ext = fpext half %arg0 to float
899 store float %arg0.ext, ptr addrspace(1) %ptr
900 %arg1.ext = fpext half %arg1 to float
901 %arg2.ext = fpext half %arg2 to float
902 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
903 %cast = fptrunc float %med3 to half
907 define half @fmed3_f32_fpext_f16_multi_use_1(half %arg0, half %arg1, half %arg2, ptr addrspace(1) %ptr) #1 {
908 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_multi_use_1:
909 ; GFX7-SDAG: ; %bb.0:
910 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
911 ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2
912 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
913 ; GFX7-SDAG-NEXT: flat_store_dword v[3:4], v1
914 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
915 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
916 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
918 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_multi_use_1:
919 ; GFX7-GISEL: ; %bb.0:
920 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
921 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
922 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
923 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
924 ; GFX7-GISEL-NEXT: flat_store_dword v[3:4], v1
925 ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2
926 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
927 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
928 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
930 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_multi_use_1:
931 ; GFX8-SDAG: ; %bb.0:
932 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
933 ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v1
934 ; GFX8-SDAG-NEXT: flat_store_dword v[3:4], v5
935 ; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v0, v1
936 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
937 ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v2
938 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v3, v0
939 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0)
940 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
942 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_multi_use_1:
943 ; GFX8-GISEL: ; %bb.0:
944 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
945 ; GFX8-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v1
946 ; GFX8-GISEL-NEXT: flat_store_dword v[3:4], v5
947 ; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v0, v1
948 ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
949 ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v3, v2
950 ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
951 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
952 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
954 ; GFX9-LABEL: fmed3_f32_fpext_f16_multi_use_1:
956 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
957 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
958 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
959 ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2
960 ; GFX9-NEXT: global_store_dword v[3:4], v1, off
961 ; GFX9-NEXT: v_med3_f32 v0, v0, v1, v2
962 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
963 ; GFX9-NEXT: s_waitcnt vmcnt(0)
964 ; GFX9-NEXT: s_setpc_b64 s[30:31]
965 %arg0.ext = fpext half %arg0 to float
966 %arg1.ext = fpext half %arg1 to float
967 store float %arg1.ext, ptr addrspace(1) %ptr
968 %arg2.ext = fpext half %arg2 to float
969 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
970 %cast = fptrunc float %med3 to half
974 define half @fmed3_f32_fpext_f16_multi_use_2(half %arg0, half %arg1, half %arg2, ptr addrspace(1) %ptr) #1 {
975 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_multi_use_2:
976 ; GFX7-SDAG: ; %bb.0:
977 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
978 ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2
979 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
980 ; GFX7-SDAG-NEXT: flat_store_dword v[3:4], v2
981 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
982 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
983 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
985 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_multi_use_2:
986 ; GFX7-GISEL: ; %bb.0:
987 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
988 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
989 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
990 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
991 ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2
992 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
993 ; GFX7-GISEL-NEXT: flat_store_dword v[3:4], v2
994 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
995 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
997 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_multi_use_2:
998 ; GFX8-SDAG: ; %bb.0:
999 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1000 ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v5, v2
1001 ; GFX8-SDAG-NEXT: flat_store_dword v[3:4], v5
1002 ; GFX8-SDAG-NEXT: v_max_f16_e32 v3, v0, v1
1003 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
1004 ; GFX8-SDAG-NEXT: v_max_f16_e32 v0, v0, v2
1005 ; GFX8-SDAG-NEXT: v_min_f16_e32 v0, v3, v0
1006 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0)
1007 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
1009 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_multi_use_2:
1010 ; GFX8-GISEL: ; %bb.0:
1011 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1012 ; GFX8-GISEL-NEXT: v_cvt_f32_f16_e32 v5, v2
1013 ; GFX8-GISEL-NEXT: flat_store_dword v[3:4], v5
1014 ; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v0, v1
1015 ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
1016 ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v3, v2
1017 ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
1018 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
1019 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
1021 ; GFX9-LABEL: fmed3_f32_fpext_f16_multi_use_2:
1023 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1024 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
1025 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
1026 ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2
1027 ; GFX9-NEXT: v_med3_f32 v0, v0, v1, v2
1028 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
1029 ; GFX9-NEXT: global_store_dword v[3:4], v2, off
1030 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1031 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1032 %arg0.ext = fpext half %arg0 to float
1033 %arg1.ext = fpext half %arg1 to float
1034 %arg2.ext = fpext half %arg2 to float
1035 store float %arg2.ext, ptr addrspace(1) %ptr
1036 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
1037 %cast = fptrunc float %med3 to
1041 define half @fmed3_f32_fpext_bf16(bfloat %arg0, bfloat %arg1, bfloat %arg2) #1 {
1042 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_bf16:
1043 ; GFX7-SDAG: ; %bb.0:
1044 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1045 ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2
1046 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1047 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
1048 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
1050 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_bf16:
1051 ; GFX7-GISEL: ; %bb.0:
1052 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1053 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
1054 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
1055 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
1056 ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2
1057 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
1058 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
1060 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_bf16:
1061 ; GFX8-SDAG: ; %bb.0:
1062 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1063 ; GFX8-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2
1064 ; GFX8-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1065 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
1067 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_bf16:
1068 ; GFX8-GISEL: ; %bb.0:
1069 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1070 ; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v0, v1
1071 ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
1072 ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v3, v2
1073 ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
1074 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
1076 ; GFX9-SDAG-LABEL: fmed3_f32_fpext_bf16:
1077 ; GFX9-SDAG: ; %bb.0:
1078 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1079 ; GFX9-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2
1080 ; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1081 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1083 ; GFX9-GISEL-LABEL: fmed3_f32_fpext_bf16:
1084 ; GFX9-GISEL: ; %bb.0:
1085 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1086 ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
1087 ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
1088 ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
1089 ; GFX9-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2
1090 ; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
1091 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1092 %arg0.ext = fpext bfloat %arg0 to float
1093 %arg1.ext = fpext bfloat %arg1 to float
1094 %arg2.ext = fpext bfloat %arg2 to float
1095 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
1096 %cast = fptrunc float %med3 to half
1100 define half @fmed3_f32_fpext_f16_bf16_0(bfloat %arg0, half %arg1, half %arg2) #1 {
1101 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_0:
1102 ; GFX7-SDAG: ; %bb.0:
1103 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1104 ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2
1105 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1106 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
1107 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
1109 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_0:
1110 ; GFX7-GISEL: ; %bb.0:
1111 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1112 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
1113 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
1114 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
1115 ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2
1116 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
1117 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
1119 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_0:
1120 ; GFX8-SDAG: ; %bb.0:
1121 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1122 ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
1123 ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
1124 ; GFX8-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2
1125 ; GFX8-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1126 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
1128 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_0:
1129 ; GFX8-GISEL: ; %bb.0:
1130 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1131 ; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v0, v1
1132 ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
1133 ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v3, v2
1134 ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
1135 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
1137 ; GFX9-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_0:
1138 ; GFX9-SDAG: ; %bb.0:
1139 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1140 ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
1141 ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
1142 ; GFX9-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2
1143 ; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1144 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1146 ; GFX9-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_0:
1147 ; GFX9-GISEL: ; %bb.0:
1148 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1149 ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
1150 ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
1151 ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
1152 ; GFX9-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2
1153 ; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
1154 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1155 %arg0.ext = fpext bfloat %arg0 to float
1156 %arg1.ext = fpext half %arg1 to float
1157 %arg2.ext = fpext half %arg2 to float
1158 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
1159 %cast = fptrunc float %med3 to half
1163 define half @fmed3_f32_fpext_f16_bf16_1(half %arg0, bfloat %arg1, half %arg2) #1 {
1164 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_1:
1165 ; GFX7-SDAG: ; %bb.0:
1166 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1167 ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2
1168 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1169 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
1170 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
1172 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_1:
1173 ; GFX7-GISEL: ; %bb.0:
1174 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1175 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
1176 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
1177 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
1178 ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2
1179 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
1180 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
1182 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_1:
1183 ; GFX8-SDAG: ; %bb.0:
1184 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1185 ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
1186 ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
1187 ; GFX8-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2
1188 ; GFX8-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1189 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
1191 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_1:
1192 ; GFX8-GISEL: ; %bb.0:
1193 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1194 ; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v0, v1
1195 ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
1196 ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v3, v2
1197 ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
1198 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
1200 ; GFX9-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_1:
1201 ; GFX9-SDAG: ; %bb.0:
1202 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1203 ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
1204 ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
1205 ; GFX9-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2
1206 ; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1207 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1209 ; GFX9-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_1:
1210 ; GFX9-GISEL: ; %bb.0:
1211 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1212 ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
1213 ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
1214 ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
1215 ; GFX9-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2
1216 ; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
1217 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1218 %arg0.ext = fpext half %arg0 to float
1219 %arg1.ext = fpext bfloat %arg1 to float
1220 %arg2.ext = fpext half %arg2 to float
1221 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
1222 %cast = fptrunc float %med3 to half
1226 define half @fmed3_f32_fpext_f16_bf16_2(half %arg0, half %arg1, bfloat %arg2) #1 {
1227 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_2:
1228 ; GFX7-SDAG: ; %bb.0:
1229 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1230 ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2
1231 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1232 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
1233 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
1235 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_2:
1236 ; GFX7-GISEL: ; %bb.0:
1237 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1238 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
1239 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
1240 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
1241 ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2
1242 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
1243 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
1245 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_2:
1246 ; GFX8-SDAG: ; %bb.0:
1247 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1248 ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
1249 ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
1250 ; GFX8-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2
1251 ; GFX8-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1252 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
1254 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_2:
1255 ; GFX8-GISEL: ; %bb.0:
1256 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1257 ; GFX8-GISEL-NEXT: v_min_f16_e32 v3, v0, v1
1258 ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
1259 ; GFX8-GISEL-NEXT: v_max_f16_e32 v1, v3, v2
1260 ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
1261 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
1263 ; GFX9-SDAG-LABEL: fmed3_f32_fpext_f16_bf16_2:
1264 ; GFX9-SDAG: ; %bb.0:
1265 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1266 ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
1267 ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
1268 ; GFX9-SDAG-NEXT: v_med3_f32 v0, v0, v1, v2
1269 ; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1270 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1272 ; GFX9-GISEL-LABEL: fmed3_f32_fpext_f16_bf16_2:
1273 ; GFX9-GISEL: ; %bb.0:
1274 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1275 ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
1276 ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
1277 ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
1278 ; GFX9-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2
1279 ; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
1280 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1281 %arg0.ext = fpext half %arg0 to float
1282 %arg1.ext = fpext half %arg1 to float
1283 %arg2.ext = fpext bfloat %arg2 to float
1284 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float %arg2.ext)
1285 %cast = fptrunc float %med3 to half
1289 define half @fmed3_f32_fpext_f16_unrepresentable_k0(half %arg1, half %arg2) #1 {
1290 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k0:
1291 ; GFX7-SDAG: ; %bb.0:
1292 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1293 ; GFX7-SDAG-NEXT: s_mov_b32 s4, 0x4f800000
1294 ; GFX7-SDAG-NEXT: v_med3_f32 v0, s4, v0, v1
1295 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1296 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
1297 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
1299 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k0:
1300 ; GFX7-GISEL: ; %bb.0:
1301 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1302 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
1303 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
1304 ; GFX7-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
1305 ; GFX7-GISEL-NEXT: v_med3_f32 v0, v2, v0, v1
1306 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
1307 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
1309 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k0:
1310 ; GFX8-SDAG: ; %bb.0:
1311 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1312 ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
1313 ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
1314 ; GFX8-SDAG-NEXT: s_mov_b32 s4, 0x4f800000
1315 ; GFX8-SDAG-NEXT: v_med3_f32 v0, s4, v0, v1
1316 ; GFX8-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1317 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
1319 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k0:
1320 ; GFX8-GISEL: ; %bb.0:
1321 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1322 ; GFX8-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
1323 ; GFX8-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
1324 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
1325 ; GFX8-GISEL-NEXT: v_med3_f32 v0, v2, v0, v1
1326 ; GFX8-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
1327 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
1329 ; GFX9-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k0:
1330 ; GFX9-SDAG: ; %bb.0:
1331 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1332 ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
1333 ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
1334 ; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x4f800000
1335 ; GFX9-SDAG-NEXT: v_med3_f32 v0, s4, v0, v1
1336 ; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1337 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1339 ; GFX9-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k0:
1340 ; GFX9-GISEL: ; %bb.0:
1341 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1342 ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
1343 ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
1344 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
1345 ; GFX9-GISEL-NEXT: v_med3_f32 v0, v2, v0, v1
1346 ; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
1347 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1348 %arg1.ext = fpext half %arg1 to float
1349 %arg2.ext = fpext half %arg2 to float
1350 %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x41f0000000000000, float %arg1.ext, float %arg2.ext)
1351 %cast = fptrunc float %med3 to half
1355 define half @fmed3_f32_fpext_f16_unrepresentable_k1(half %arg0, half %arg2) #1 {
1356 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k1:
1357 ; GFX7-SDAG: ; %bb.0:
1358 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1359 ; GFX7-SDAG-NEXT: s_mov_b32 s4, 0x4f800000
1360 ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, s4, v1
1361 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1362 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
1363 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
1365 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k1:
1366 ; GFX7-GISEL: ; %bb.0:
1367 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1368 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
1369 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
1370 ; GFX7-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
1371 ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v2, v1
1372 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
1373 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
1375 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k1:
1376 ; GFX8-SDAG: ; %bb.0:
1377 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1378 ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
1379 ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
1380 ; GFX8-SDAG-NEXT: s_mov_b32 s4, 0x4f800000
1381 ; GFX8-SDAG-NEXT: v_med3_f32 v0, v0, s4, v1
1382 ; GFX8-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1383 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
1385 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k1:
1386 ; GFX8-GISEL: ; %bb.0:
1387 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1388 ; GFX8-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
1389 ; GFX8-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
1390 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
1391 ; GFX8-GISEL-NEXT: v_med3_f32 v0, v0, v2, v1
1392 ; GFX8-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
1393 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
1395 ; GFX9-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k1:
1396 ; GFX9-SDAG: ; %bb.0:
1397 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1398 ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
1399 ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
1400 ; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x4f800000
1401 ; GFX9-SDAG-NEXT: v_med3_f32 v0, v0, s4, v1
1402 ; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1403 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1405 ; GFX9-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k1:
1406 ; GFX9-GISEL: ; %bb.0:
1407 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1408 ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
1409 ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
1410 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
1411 ; GFX9-GISEL-NEXT: v_med3_f32 v0, v0, v2, v1
1412 ; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
1413 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1414 %arg0.ext = fpext half %arg0 to float
1415 %arg2.ext = fpext half %arg2 to float
1416 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float 0x41f0000000000000, float %arg2.ext)
1417 %cast = fptrunc float %med3 to half
1421 define half @fmed3_f32_fpext_f16_unrepresentable_k2(half %arg0, half %arg1) #1 {
1422 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k2:
1423 ; GFX7-SDAG: ; %bb.0:
1424 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1425 ; GFX7-SDAG-NEXT: s_mov_b32 s4, 0x4f800000
1426 ; GFX7-SDAG-NEXT: v_med3_f32 v0, v0, v1, s4
1427 ; GFX7-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1428 ; GFX7-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
1429 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
1431 ; GFX7-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k2:
1432 ; GFX7-GISEL: ; %bb.0:
1433 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1434 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
1435 ; GFX7-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
1436 ; GFX7-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
1437 ; GFX7-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2
1438 ; GFX7-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
1439 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
1441 ; GFX8-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k2:
1442 ; GFX8-SDAG: ; %bb.0:
1443 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1444 ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
1445 ; GFX8-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
1446 ; GFX8-SDAG-NEXT: s_mov_b32 s4, 0x4f800000
1447 ; GFX8-SDAG-NEXT: v_med3_f32 v0, v0, v1, s4
1448 ; GFX8-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1449 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
1451 ; GFX8-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k2:
1452 ; GFX8-GISEL: ; %bb.0:
1453 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1454 ; GFX8-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
1455 ; GFX8-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
1456 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
1457 ; GFX8-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2
1458 ; GFX8-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
1459 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
1461 ; GFX9-SDAG-LABEL: fmed3_f32_fpext_f16_unrepresentable_k2:
1462 ; GFX9-SDAG: ; %bb.0:
1463 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1464 ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
1465 ; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
1466 ; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x4f800000
1467 ; GFX9-SDAG-NEXT: v_med3_f32 v0, v0, v1, s4
1468 ; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
1469 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1471 ; GFX9-GISEL-LABEL: fmed3_f32_fpext_f16_unrepresentable_k2:
1472 ; GFX9-GISEL: ; %bb.0:
1473 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1474 ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
1475 ; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
1476 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x4f800000
1477 ; GFX9-GISEL-NEXT: v_med3_f32 v0, v0, v1, v2
1478 ; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
1479 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1480 %arg0.ext = fpext half %arg0 to float
1481 %arg1.ext = fpext half %arg1 to float
1482 %med3 = call float @llvm.amdgcn.fmed3.f32(float %arg0.ext, float %arg1.ext, float 0x41f0000000000000)
1483 %cast = fptrunc float %med3 to half
1487 attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
1488 attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn }
1489 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: