1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s
6 ; GCN-LABEL: {{^}}v_test_nnan_input_fmed3_r_i_i_f32:
7 ; GCN: v_add_f32_e32 [[ADD:v[0-9]+]], 1.0, v{{[0-9]+}}
8 ; GCN: v_med3_f32 v{{[0-9]+}}, [[ADD]], 2.0, 4.0
9 define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
10 %tid = call i32 @llvm.amdgcn.workitem.id.x()
11 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
12 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
13 %a = load float, float addrspace(1)* %gep0
14 %a.add = fadd nnan float %a, 1.0
15 %max = call float @llvm.maxnum.f32(float %a.add, float 2.0)
16 %med = call float @llvm.minnum.f32(float %max, float 4.0)
18 store float %med, float addrspace(1)* %outgep
22 ; GCN-LABEL: {{^}}v_test_fmed3_nnan_r_i_i_f32:
23 ; GCN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
24 define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
25 %tid = call i32 @llvm.amdgcn.workitem.id.x()
26 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
27 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
28 %a = load float, float addrspace(1)* %gep0
29 %a.add = fadd nnan float %a, 1.0
31 %max = call float @llvm.maxnum.f32(float %a.add, float 2.0)
32 %med = call float @llvm.minnum.f32(float %max, float 4.0)
34 store float %med, float addrspace(1)* %outgep
38 ; GCN-LABEL: {{^}}v_test_fmed3_nnan_r_i_i_commute0_f32:
39 ; GCN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
40 define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute0_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
41 %tid = call i32 @llvm.amdgcn.workitem.id.x()
42 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
43 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
44 %a = load float, float addrspace(1)* %gep0
45 %a.add = fadd nnan float %a, 1.0
47 %max = call float @llvm.maxnum.f32(float 2.0, float %a.add)
48 %med = call float @llvm.minnum.f32(float 4.0, float %max)
50 store float %med, float addrspace(1)* %outgep
54 ; GCN-LABEL: {{^}}v_test_fmed3_nnan_r_i_i_commute1_f32:
55 ; GCN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
56 define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute1_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
57 %tid = call i32 @llvm.amdgcn.workitem.id.x()
58 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
59 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
60 %a = load float, float addrspace(1)* %gep0
61 %a.add = fadd nnan float %a, 1.0
63 %max = call float @llvm.maxnum.f32(float %a.add, float 2.0)
64 %med = call float @llvm.minnum.f32(float 4.0, float %max)
66 store float %med, float addrspace(1)* %outgep
70 ; GCN-LABEL: {{^}}v_test_fmed3_nnan_r_i_i_constant_order_f32:
71 ; GCN: v_max_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
72 ; GCN: v_min_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
73 define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_constant_order_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
74 %tid = call i32 @llvm.amdgcn.workitem.id.x()
75 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
76 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
77 %a = load float, float addrspace(1)* %gep0
78 %a.add = fadd nnan float %a, 1.0
80 %max = call float @llvm.maxnum.f32(float %a.add, float 4.0)
81 %med = call float @llvm.minnum.f32(float %max, float 2.0)
83 store float %med, float addrspace(1)* %outgep
87 ; GCN-LABEL: {{^}}v_test_fmed3_nnan_r_i_i_multi_use_f32:
88 ; GCN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
89 ; GCN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
90 define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_multi_use_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
91 %tid = call i32 @llvm.amdgcn.workitem.id.x()
92 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
93 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
94 %a = load float, float addrspace(1)* %gep0
95 %a.add = fadd nnan float %a, 1.0
97 %max = call float @llvm.maxnum.f32(float %a.add, float 2.0)
98 %med = call float @llvm.minnum.f32(float %max, float 4.0)
100 store volatile float %med, float addrspace(1)* %outgep
101 store volatile float %max, float addrspace(1)* %outgep
105 ; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_f64:
106 ; GCN: v_max_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, 2.0
107 ; GCN: v_min_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, 4.0
108 define amdgpu_kernel void @v_test_fmed3_r_i_i_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #1 {
109 %tid = call i32 @llvm.amdgcn.workitem.id.x()
110 %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
111 %outgep = getelementptr double, double addrspace(1)* %out, i32 %tid
112 %a = load double, double addrspace(1)* %gep0
113 %a.add = fadd nnan double %a, 1.0
115 %max = call double @llvm.maxnum.f64(double %a.add, double 2.0)
116 %med = call double @llvm.minnum.f64(double %max, double 4.0)
118 store double %med, double addrspace(1)* %outgep
122 ; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_no_nans_f32:
123 ; GCN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
124 define amdgpu_kernel void @v_test_fmed3_r_i_i_no_nans_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
125 %tid = call i32 @llvm.amdgcn.workitem.id.x()
126 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
127 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
128 %a = load float, float addrspace(1)* %gep0
130 %max = call float @llvm.maxnum.f32(float %a, float 2.0)
131 %med = call float @llvm.minnum.f32(float %max, float 4.0)
133 store float %med, float addrspace(1)* %outgep
137 ; GCN-LABEL: {{^}}v_test_legacy_fmed3_r_i_i_f32:
138 ; GCN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
139 define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
140 %tid = call i32 @llvm.amdgcn.workitem.id.x()
141 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
142 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
143 %a = load float, float addrspace(1)* %gep0
144 %a.nnan = fadd nnan float %a, 1.0
147 %cmp0 = fcmp ule float %a.nnan, 2.0
148 %max = select i1 %cmp0, float 2.0, float %a.nnan
151 %cmp1 = fcmp uge float %max, 4.0
152 %med = select i1 %cmp1, float 4.0, float %max
154 store float %med, float addrspace(1)* %outgep
158 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod0:
159 ; GCN: {{buffer_|flat_|global_}}load_dword [[A:v[0-9]+]]
160 ; GCN: {{buffer_|flat_|global_}}load_dword [[B:v[0-9]+]]
161 ; GCN: {{buffer_|flat_|global_}}load_dword [[C:v[0-9]+]]
162 ; GCN: v_med3_f32 v{{[0-9]+}}, -[[A]], [[B]], [[C]]
163 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
164 %tid = call i32 @llvm.amdgcn.workitem.id.x()
165 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
166 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
167 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
168 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
169 %a = load volatile float, float addrspace(1)* %gep0
170 %b = load volatile float, float addrspace(1)* %gep1
171 %c = load volatile float, float addrspace(1)* %gep2
172 %a.fneg = fsub float -0.0, %a
173 %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b)
174 %tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b)
175 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
176 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
177 store float %med3, float addrspace(1)* %outgep
181 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod1:
182 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
183 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
184 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
185 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], -[[B]], [[C]]
186 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
187 %tid = call i32 @llvm.amdgcn.workitem.id.x()
188 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
189 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
190 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
191 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
192 %a = load volatile float, float addrspace(1)* %gep0
193 %b = load volatile float, float addrspace(1)* %gep1
194 %c = load volatile float, float addrspace(1)* %gep2
195 %b.fneg = fsub float -0.0, %b
196 %tmp0 = call float @llvm.minnum.f32(float %a, float %b.fneg)
197 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b.fneg)
198 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
199 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
200 store float %med3, float addrspace(1)* %outgep
204 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod2:
205 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
206 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
207 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
208 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], -[[C]]
209 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod2(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
210 %tid = call i32 @llvm.amdgcn.workitem.id.x()
211 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
212 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
213 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
214 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
215 %a = load volatile float, float addrspace(1)* %gep0
216 %b = load volatile float, float addrspace(1)* %gep1
217 %c = load volatile float, float addrspace(1)* %gep2
218 %c.fneg = fsub float -0.0, %c
219 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
220 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
221 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fneg)
222 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
223 store float %med3, float addrspace(1)* %outgep
227 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod012:
228 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
229 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
230 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
231 ; GCN: v_med3_f32 v{{[0-9]+}}, -[[A]], |[[B]]|, -|[[C]]|
232 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
233 %tid = call i32 @llvm.amdgcn.workitem.id.x()
234 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
235 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
236 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
237 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
238 %a = load volatile float, float addrspace(1)* %gep0
239 %b = load volatile float, float addrspace(1)* %gep1
240 %c = load volatile float, float addrspace(1)* %gep2
242 %a.fneg = fsub float -0.0, %a
243 %b.fabs = call float @llvm.fabs.f32(float %b)
244 %c.fabs = call float @llvm.fabs.f32(float %c)
245 %c.fabs.fneg = fsub float -0.0, %c.fabs
247 %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b.fabs)
248 %tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b.fabs)
249 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
250 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
252 store float %med3, float addrspace(1)* %outgep
256 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_negabs012:
257 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
258 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
259 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
260 ; GCN: v_med3_f32 v{{[0-9]+}}, -|[[A]]|, -|[[B]]|, -|[[C]]|
261 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
262 %tid = call i32 @llvm.amdgcn.workitem.id.x()
263 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
264 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
265 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
266 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
267 %a = load volatile float, float addrspace(1)* %gep0
268 %b = load volatile float, float addrspace(1)* %gep1
269 %c = load volatile float, float addrspace(1)* %gep2
271 %a.fabs = call float @llvm.fabs.f32(float %a)
272 %a.fabs.fneg = fsub float -0.0, %a.fabs
273 %b.fabs = call float @llvm.fabs.f32(float %b)
274 %b.fabs.fneg = fsub float -0.0, %b.fabs
275 %c.fabs = call float @llvm.fabs.f32(float %c)
276 %c.fabs.fneg = fsub float -0.0, %c.fabs
278 %tmp0 = call float @llvm.minnum.f32(float %a.fabs.fneg, float %b.fabs.fneg)
279 %tmp1 = call float @llvm.maxnum.f32(float %a.fabs.fneg, float %b.fabs.fneg)
280 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
281 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
283 store float %med3, float addrspace(1)* %outgep
287 ; GCN-LABEL: {{^}}v_nnan_inputs_med3_f32_pat0:
288 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
289 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
290 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
291 ; GCN-DAG: v_add_f32_e32 [[A_ADD:v[0-9]+]], 1.0, [[A]]
292 ; GCN-DAG: v_add_f32_e32 [[B_ADD:v[0-9]+]], 2.0, [[B]]
293 ; GCN-DAG: v_add_f32_e32 [[C_ADD:v[0-9]+]], 4.0, [[C]]
294 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A_ADD]], [[B_ADD]], [[C_ADD]]
295 define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
296 %tid = call i32 @llvm.amdgcn.workitem.id.x()
297 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
298 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
299 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
300 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
301 %a = load volatile float, float addrspace(1)* %gep0
302 %b = load volatile float, float addrspace(1)* %gep1
303 %c = load volatile float, float addrspace(1)* %gep2
305 %a.nnan = fadd nnan float %a, 1.0
306 %b.nnan = fadd nnan float %b, 2.0
307 %c.nnan = fadd nnan float %c, 4.0
309 %tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan)
310 %tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
311 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
312 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
313 store float %med3, float addrspace(1)* %outgep
319 ; 0: max(min(x, y), min(max(x, y), z))
320 ; 1: max(min(x, y), min(max(y, x), z))
321 ; 2: max(min(x, y), min(z, max(x, y)))
322 ; 3: max(min(x, y), min(z, max(y, x)))
323 ; 4: max(min(y, x), min(max(x, y), z))
324 ; 5: max(min(y, x), min(max(y, x), z))
325 ; 6: max(min(y, x), min(z, max(x, y)))
326 ; 7: max(min(y, x), min(z, max(y, x)))
328 ; + commute outermost max
330 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0:
331 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
332 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
333 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
334 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
335 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
336 %tid = call i32 @llvm.amdgcn.workitem.id.x()
337 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
338 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
339 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
340 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
341 %a = load volatile float, float addrspace(1)* %gep0
342 %b = load volatile float, float addrspace(1)* %gep1
343 %c = load volatile float, float addrspace(1)* %gep2
344 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
345 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
346 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
347 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
348 store float %med3, float addrspace(1)* %outgep
352 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat1:
353 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
354 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
355 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
356 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
357 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
358 %tid = call i32 @llvm.amdgcn.workitem.id.x()
359 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
360 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
361 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
362 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
363 %a = load volatile float, float addrspace(1)* %gep0
364 %b = load volatile float, float addrspace(1)* %gep1
365 %c = load volatile float, float addrspace(1)* %gep2
366 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
367 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
368 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
369 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
370 store float %med3, float addrspace(1)* %outgep
374 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat2:
375 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
376 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
377 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
378 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
379 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat2(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
380 %tid = call i32 @llvm.amdgcn.workitem.id.x()
381 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
382 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
383 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
384 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
385 %a = load volatile float, float addrspace(1)* %gep0
386 %b = load volatile float, float addrspace(1)* %gep1
387 %c = load volatile float, float addrspace(1)* %gep2
388 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
389 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
390 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
391 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
392 store float %med3, float addrspace(1)* %outgep
396 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat3:
397 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
398 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
399 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
400 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
401 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat3(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
402 %tid = call i32 @llvm.amdgcn.workitem.id.x()
403 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
404 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
405 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
406 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
407 %a = load volatile float, float addrspace(1)* %gep0
408 %b = load volatile float, float addrspace(1)* %gep1
409 %c = load volatile float, float addrspace(1)* %gep2
410 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
411 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
412 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
413 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
414 store float %med3, float addrspace(1)* %outgep
418 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat4:
419 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
420 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
421 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
422 ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
423 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat4(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
424 %tid = call i32 @llvm.amdgcn.workitem.id.x()
425 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
426 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
427 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
428 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
429 %a = load volatile float, float addrspace(1)* %gep0
430 %b = load volatile float, float addrspace(1)* %gep1
431 %c = load volatile float, float addrspace(1)* %gep2
432 %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
433 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
434 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
435 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
436 store float %med3, float addrspace(1)* %outgep
440 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat5:
441 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
442 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
443 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
444 ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
445 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat5(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
446 %tid = call i32 @llvm.amdgcn.workitem.id.x()
447 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
448 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
449 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
450 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
451 %a = load volatile float, float addrspace(1)* %gep0
452 %b = load volatile float, float addrspace(1)* %gep1
453 %c = load volatile float, float addrspace(1)* %gep2
454 %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
455 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
456 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
457 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
458 store float %med3, float addrspace(1)* %outgep
462 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat6:
463 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
464 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
465 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
466 ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
467 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat6(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
468 %tid = call i32 @llvm.amdgcn.workitem.id.x()
469 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
470 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
471 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
472 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
473 %a = load volatile float, float addrspace(1)* %gep0
474 %b = load volatile float, float addrspace(1)* %gep1
475 %c = load volatile float, float addrspace(1)* %gep2
476 %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
477 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
478 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
479 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
480 store float %med3, float addrspace(1)* %outgep
484 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat7:
485 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
486 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
487 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
488 ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
489 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat7(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
490 %tid = call i32 @llvm.amdgcn.workitem.id.x()
491 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
492 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
493 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
494 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
495 %a = load volatile float, float addrspace(1)* %gep0
496 %b = load volatile float, float addrspace(1)* %gep1
497 %c = load volatile float, float addrspace(1)* %gep2
498 %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
499 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
500 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
501 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
502 store float %med3, float addrspace(1)* %outgep
506 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat8:
507 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
508 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
509 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
510 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
511 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat8(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
512 %tid = call i32 @llvm.amdgcn.workitem.id.x()
513 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
514 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
515 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
516 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
517 %a = load volatile float, float addrspace(1)* %gep0
518 %b = load volatile float, float addrspace(1)* %gep1
519 %c = load volatile float, float addrspace(1)* %gep2
520 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
521 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
522 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
523 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
524 store float %med3, float addrspace(1)* %outgep
528 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat9:
529 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
530 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
531 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
532 ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
533 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat9(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
534 %tid = call i32 @llvm.amdgcn.workitem.id.x()
535 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
536 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
537 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
538 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
539 %a = load volatile float, float addrspace(1)* %gep0
540 %b = load volatile float, float addrspace(1)* %gep1
541 %c = load volatile float, float addrspace(1)* %gep2
542 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
543 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
544 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
545 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
546 store float %med3, float addrspace(1)* %outgep
550 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat10:
551 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
552 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
553 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
554 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
555 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat10(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
556 %tid = call i32 @llvm.amdgcn.workitem.id.x()
557 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
558 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
559 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
560 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
561 %a = load volatile float, float addrspace(1)* %gep0
562 %b = load volatile float, float addrspace(1)* %gep1
563 %c = load volatile float, float addrspace(1)* %gep2
564 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
565 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
566 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
567 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
568 store float %med3, float addrspace(1)* %outgep
572 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat11:
573 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
574 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
575 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
576 ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
577 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat11(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
578 %tid = call i32 @llvm.amdgcn.workitem.id.x()
579 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
580 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
581 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
582 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
583 %a = load volatile float, float addrspace(1)* %gep0
584 %b = load volatile float, float addrspace(1)* %gep1
585 %c = load volatile float, float addrspace(1)* %gep2
586 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
587 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
588 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
589 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
590 store float %med3, float addrspace(1)* %outgep
594 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat12:
595 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
596 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
597 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
598 ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
599 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat12(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
600 %tid = call i32 @llvm.amdgcn.workitem.id.x()
601 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
602 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
603 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
604 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
605 %a = load volatile float, float addrspace(1)* %gep0
606 %b = load volatile float, float addrspace(1)* %gep1
607 %c = load volatile float, float addrspace(1)* %gep2
608 %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
609 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
610 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
611 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
612 store float %med3, float addrspace(1)* %outgep
616 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat13:
617 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
618 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
619 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
620 ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
621 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat13(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
622 %tid = call i32 @llvm.amdgcn.workitem.id.x()
623 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
624 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
625 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
626 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
627 %a = load volatile float, float addrspace(1)* %gep0
628 %b = load volatile float, float addrspace(1)* %gep1
629 %c = load volatile float, float addrspace(1)* %gep2
630 %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
631 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
632 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
633 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
634 store float %med3, float addrspace(1)* %outgep
638 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat14:
639 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
640 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
641 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
642 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]]
643 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat14(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
644 %tid = call i32 @llvm.amdgcn.workitem.id.x()
645 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
646 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
647 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
648 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
649 %a = load volatile float, float addrspace(1)* %gep0
650 %b = load volatile float, float addrspace(1)* %gep1
651 %c = load volatile float, float addrspace(1)* %gep2
652 %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
653 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
654 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
655 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
656 store float %med3, float addrspace(1)* %outgep
660 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat15:
661 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
662 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
663 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
664 ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]]
665 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat15(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
666 %tid = call i32 @llvm.amdgcn.workitem.id.x()
667 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
668 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
669 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
670 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
671 %a = load volatile float, float addrspace(1)* %gep0
672 %b = load volatile float, float addrspace(1)* %gep1
673 %c = load volatile float, float addrspace(1)* %gep2
674 %tmp0 = call float @llvm.minnum.f32(float %b, float %a)
675 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a)
676 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1)
677 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0)
678 store float %med3, float addrspace(1)* %outgep
682 ; ---------------------------------------------------------------------
684 ; ---------------------------------------------------------------------
686 ; GCN-LABEL: {{^}}v_test_safe_med3_f32_pat0_multi_use0:
691 define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
692 %tid = call i32 @llvm.amdgcn.workitem.id.x()
693 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
694 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
695 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
696 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
697 %a = load volatile float, float addrspace(1)* %gep0
698 %b = load volatile float, float addrspace(1)* %gep1
699 %c = load volatile float, float addrspace(1)* %gep2
700 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
701 store volatile float %tmp0, float addrspace(1)* undef
702 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
703 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
704 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
705 store float %med3, float addrspace(1)* %outgep
709 ; GCN-LABEL: {{^}}v_test_safe_med3_f32_pat0_multi_use1:
710 define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
711 %tid = call i32 @llvm.amdgcn.workitem.id.x()
712 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
713 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
714 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
715 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
716 %a = load volatile float, float addrspace(1)* %gep0
717 %b = load volatile float, float addrspace(1)* %gep1
718 %c = load volatile float, float addrspace(1)* %gep2
719 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
720 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
721 store volatile float %tmp1, float addrspace(1)* undef
722 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
723 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
724 store float %med3, float addrspace(1)* %outgep
728 ; GCN-LABEL: {{^}}v_test_safe_med3_f32_pat0_multi_use2:
729 define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use2(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
730 %tid = call i32 @llvm.amdgcn.workitem.id.x()
731 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
732 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
733 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
734 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
735 %a = load volatile float, float addrspace(1)* %gep0
736 %b = load volatile float, float addrspace(1)* %gep1
737 %c = load volatile float, float addrspace(1)* %gep2
738 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
739 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
740 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
741 store volatile float %tmp2, float addrspace(1)* undef
742 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
743 store float %med3, float addrspace(1)* %outgep
748 ; GCN-LABEL: {{^}}v_test_safe_med3_f32_pat0:
749 define amdgpu_kernel void @v_test_safe_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
750 %tid = call i32 @llvm.amdgcn.workitem.id.x()
751 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
752 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
753 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
754 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
755 %a = load volatile float, float addrspace(1)* %gep0
756 %b = load volatile float, float addrspace(1)* %gep1
757 %c = load volatile float, float addrspace(1)* %gep2
758 %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
759 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
760 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
761 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
762 store float %med3, float addrspace(1)* %outgep
766 ; GCN-LABEL: {{^}}v_nnan_inputs_missing0_med3_f32_pat0:
767 define amdgpu_kernel void @v_nnan_inputs_missing0_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
768 %tid = call i32 @llvm.amdgcn.workitem.id.x()
769 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
770 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
771 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
772 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
773 %a = load volatile float, float addrspace(1)* %gep0
774 %b = load volatile float, float addrspace(1)* %gep1
775 %c = load volatile float, float addrspace(1)* %gep2
777 %a.nnan = fadd float %a, 1.0
778 %b.nnan = fadd nnan float %b, 2.0
779 %c.nnan = fadd nnan float %c, 4.0
781 %tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan)
782 %tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
783 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
784 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
785 store float %med3, float addrspace(1)* %outgep
789 ; GCN-LABEL: {{^}}v_nnan_inputs_missing1_med3_f32_pat0:
790 define amdgpu_kernel void @v_nnan_inputs_missing1_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
791 %tid = call i32 @llvm.amdgcn.workitem.id.x()
792 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
793 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
794 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
795 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
796 %a = load volatile float, float addrspace(1)* %gep0
797 %b = load volatile float, float addrspace(1)* %gep1
798 %c = load volatile float, float addrspace(1)* %gep2
800 %a.nnan = fadd nnan float %a, 1.0
801 %b.nnan = fadd float %b, 2.0
802 %c.nnan = fadd nnan float %c, 4.0
804 %tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan)
805 %tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
806 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
807 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
808 store float %med3, float addrspace(1)* %outgep
812 ; GCN-LABEL: {{^}}v_nnan_inputs_missing2_med3_f32_pat0:
813 define amdgpu_kernel void @v_nnan_inputs_missing2_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
814 %tid = call i32 @llvm.amdgcn.workitem.id.x()
815 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
816 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
817 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
818 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
819 %a = load volatile float, float addrspace(1)* %gep0
820 %b = load volatile float, float addrspace(1)* %gep1
821 %c = load volatile float, float addrspace(1)* %gep2
823 %a.nnan = fadd nnan float %a, 1.0
824 %b.nnan = fadd nnan float %b, 2.0
825 %c.nnan = fadd float %c, 4.0
827 %tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan)
828 %tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
829 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
830 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
831 store float %med3, float addrspace(1)* %outgep
835 ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch:
836 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
837 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
838 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
843 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
844 %tid = call i32 @llvm.amdgcn.workitem.id.x()
845 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
846 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
847 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
848 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
849 %a = load volatile float, float addrspace(1)* %gep0
850 %b = load volatile float, float addrspace(1)* %gep1
851 %c = load volatile float, float addrspace(1)* %gep2
852 %a.fneg = fsub float -0.0, %a
853 %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b)
854 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
855 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
856 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
857 store float %med3, float addrspace(1)* %outgep
861 ; A simple min and max is not sufficient
862 ; GCN-LABEL: {{^}}v_test_global_nnans_min_max_f32:
863 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
864 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
865 ; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]]
866 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], [[A]], [[B]]
867 ; GCN: v_min_f32_e32 v{{[0-9]+}}, [[MAX]], [[C]]
868 define amdgpu_kernel void @v_test_global_nnans_min_max_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
869 %tid = call i32 @llvm.amdgcn.workitem.id.x()
870 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
871 %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
872 %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
873 %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
874 %a = load volatile float, float addrspace(1)* %gep0
875 %b = load volatile float, float addrspace(1)* %gep1
876 %c = load volatile float, float addrspace(1)* %gep2
877 %max = call float @llvm.maxnum.f32(float %a, float %b)
878 %minmax = call float @llvm.minnum.f32(float %max, float %c)
879 store float %minmax, float addrspace(1)* %outgep
883 ; GCN-LABEL: {{^}}v_test_nnan_input_fmed3_r_i_i_f16:
885 ; SI: v_add_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
886 ; SI: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
889 ; VI: v_add_f16_e32 v{{[0-9]+}}, 1.0
890 ; VI: v_max_f16_e32 v{{[0-9]+}}, 2.0
891 ; VI: v_min_f16_e32 v{{[0-9]+}}, 4.0
893 ; GFX9: v_add_f16_e32 [[ADD:v[0-9]+]], 1.0
894 ; GFX9: v_med3_f16 v{{[0-9]+}}, [[ADD]], 2.0, 4.0
895 define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #1 {
896 %tid = call i32 @llvm.amdgcn.workitem.id.x()
897 %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
898 %outgep = getelementptr half, half addrspace(1)* %out, i32 %tid
899 %a = load half, half addrspace(1)* %gep0
900 %a.add = fadd nnan half %a, 1.0
901 %max = call half @llvm.maxnum.f16(half %a.add, half 2.0)
902 %med = call half @llvm.minnum.f16(half %max, half 4.0)
904 store half %med, half addrspace(1)* %outgep
908 ; GCN-LABEL: {{^}}v_nnan_inputs_med3_f16_pat0:
909 ; GCN: {{buffer|flat|global}}_load_ushort [[A:v[0-9]+]]
910 ; GCN: {{buffer|flat|global}}_load_ushort [[B:v[0-9]+]]
911 ; GCN: {{buffer|flat|global}}_load_ushort [[C:v[0-9]+]]
919 ; SI: v_cvt_f16_f32_e32
922 ; GFX89-DAG: v_add_f16_e32 [[A_ADD:v[0-9]+]], 1.0, [[A]]
923 ; GFX89-DAG: v_add_f16_e32 [[B_ADD:v[0-9]+]], 2.0, [[B]]
924 ; GFX89-DAG: v_add_f16_e32 [[C_ADD:v[0-9]+]], 4.0, [[C]]
931 ; GFX9: v_med3_f16 v{{[0-9]+}}, [[A_ADD]], [[B_ADD]], [[C_ADD]]
932 define amdgpu_kernel void @v_nnan_inputs_med3_f16_pat0(half addrspace(1)* %out, half addrspace(1)* %aptr, half addrspace(1)* %bptr, half addrspace(1)* %cptr) #1 {
933 %tid = call i32 @llvm.amdgcn.workitem.id.x()
934 %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
935 %gep1 = getelementptr half, half addrspace(1)* %bptr, i32 %tid
936 %gep2 = getelementptr half, half addrspace(1)* %cptr, i32 %tid
937 %outgep = getelementptr half, half addrspace(1)* %out, i32 %tid
938 %a = load volatile half, half addrspace(1)* %gep0
939 %b = load volatile half, half addrspace(1)* %gep1
940 %c = load volatile half, half addrspace(1)* %gep2
942 %a.nnan = fadd nnan half %a, 1.0
943 %b.nnan = fadd nnan half %b, 2.0
944 %c.nnan = fadd nnan half %c, 4.0
946 %tmp0 = call half @llvm.minnum.f16(half %a.nnan, half %b.nnan)
947 %tmp1 = call half @llvm.maxnum.f16(half %a.nnan, half %b.nnan)
948 %tmp2 = call half @llvm.minnum.f16(half %tmp1, half %c.nnan)
949 %med3 = call half @llvm.maxnum.f16(half %tmp0, half %tmp2)
950 store half %med3, half addrspace(1)* %outgep
954 ; GCN-LABEL: {{^}}two_non_inline_constant:
955 ; GCN: v_add_f32_e32 [[ADD:v[0-9]+]], 0.5,
956 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0x41000000, [[ADD]]
957 ; GCN: v_min_f32_e32 v{{[0-9]+}}, 0x41800000, [[MAX]]
958 define amdgpu_kernel void @two_non_inline_constant(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
959 %tid = call i32 @llvm.amdgcn.workitem.id.x()
960 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
961 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
962 %a = load float, float addrspace(1)* %gep0
963 %add = fadd nnan float %a, 0.5
964 %max = call float @llvm.maxnum.f32(float %add, float 8.0)
965 %med = call float @llvm.minnum.f32(float %max, float 16.0)
967 store float %med, float addrspace(1)* %out.gep
971 ; FIXME: Simple stores do not work as a multiple use because they are bitcasted to integer constants.
972 ; GCN-LABEL: {{^}}one_non_inline_constant:
973 ; GCN-DAG: s_mov_b32 [[K1:s[0-9]+]], 0x41800000
974 ; GCN-DAG: v_add_f32_e32 [[ADD:v[0-9]+]], 0.5,
975 ; GCN: v_med3_f32 v{{[0-9]+}}, [[ADD]], 1.0, [[K1]]
976 define amdgpu_kernel void @one_non_inline_constant(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
977 %tid = call i32 @llvm.amdgcn.workitem.id.x()
978 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
979 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
980 %a = load float, float addrspace(1)* %gep0
981 %add = fadd nnan float %a, 0.5
982 %max = call float @llvm.maxnum.f32(float %add, float 1.0)
983 %med = call float @llvm.minnum.f32(float %max, float 16.0)
985 store float %med, float addrspace(1)* %out.gep
987 %extra.use = fadd float %a, 16.0
988 store volatile float %extra.use, float addrspace(1)* undef
992 ; GCN-LABEL: {{^}}two_non_inline_constant_multi_use:
993 ; GCN-DAG: s_mov_b32 [[K1:s[0-9]+]], 0x41800000
994 ; GCN-DAG: s_mov_b32 [[K0:s[0-9]+]], 0x41000000
995 ; GCN-DAG: v_mov_b32_e32 [[VK1:v[0-9]+]], [[K1]]
996 ; GCN-DAG: v_add_f32_e32 [[ADD:v[0-9]+]], 0.5,
997 ; GCN: v_med3_f32 v{{[0-9]+}}, [[ADD]], [[K0]], [[VK1]]
998 define amdgpu_kernel void @two_non_inline_constant_multi_use(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
999 %tid = call i32 @llvm.amdgcn.workitem.id.x()
1000 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
1001 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
1002 %a = load float, float addrspace(1)* %gep0
1003 %add = fadd nnan float %a, 0.5
1004 %max = call float @llvm.maxnum.f32(float %add, float 8.0)
1005 %med = call float @llvm.minnum.f32(float %max, float 16.0)
1007 store float %med, float addrspace(1)* %out.gep
1009 %extra.use0 = fadd float %a, 16.0
1010 store volatile float %extra.use0, float addrspace(1)* undef
1011 %extra.use1 = fadd float %a, 8.0
1012 store volatile float %extra.use1, float addrspace(1)* undef
1016 declare i32 @llvm.amdgcn.workitem.id.x() #0
1017 declare float @llvm.fabs.f32(float) #0
1018 declare float @llvm.minnum.f32(float, float) #0
1019 declare float @llvm.maxnum.f32(float, float) #0
1020 declare double @llvm.minnum.f64(double, double) #0
1021 declare double @llvm.maxnum.f64(double, double) #0
1022 declare half @llvm.fabs.f16(half) #0
1023 declare half @llvm.minnum.f16(half, half) #0
1024 declare half @llvm.maxnum.f16(half, half) #0
1026 attributes #0 = { nounwind readnone }
1027 attributes #1 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" }
1028 attributes #2 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" }